@inproceedings{c09cf21bee5644c1b4189ce616994bcf,
title = "Compressed hierarchical clustering",
abstract = "Hierarchical Clustering is widely used in Machine Learning and Data Mining. It stores bit-vectors in the nodes of a k-ary tree, usually without trying to compress them. We suggest a double usage of the {\sf xor}ing operations defining the Hamming distance used in the clustering process, extending it also to be used to transform the vector in one node into a more compressible form, as a function of the vector in the parent node. Compression is then achieved by run-length encoding, followed by optional Huffman coding, and we show how the compressed file may be processed directly, without decompression.",
keywords = "Hamming distance, Hierarchical Clustering, Run length encoding",
author = "Gilad Baruch and Dana Shapira and Klein, {Shmuel T.}",
note = "Publisher Copyright: {\textcopyright} 2018 IEEE.; 2018 Data Compression Conference, DCC 2018 ; Conference date: 27-03-2018 Through 30-03-2018",
year = "2018",
month = jul,
day = "19",
doi = "https://doi.org/10.1109/dcc.2018.00052",
language = "الإنجليزيّة",
series = "Data Compression Conference Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "399",
editor = "Ali Bilgin and Storer, {James A.} and Joan Serra-Sagrista and Marcellin, {Michael W.}",
booktitle = "Proceedings - DCC 2018",
address = "الولايات المتّحدة",
}