@inproceedings{0ed8e59831bd4672a8127e7076a60a18,
title = "Fishing in the stream: Similarity search over endless data",
abstract = "Similarity search is the task of retrieving data items that are similar to a given query. In this paper, we introduce the time-sensitive notion of similarity search over endless data-streams (SSDS), which takes into account data quality and temporal characteristics in addition to similarity. SSDS is challenging as it needs to process unbounded data, while computation resources are bounded. We propose Stream-LSH, a randomized SSDS algorithm that bounds the index size by retaining items according to their freshness, quality, and dynamic popularity attributes. We show that Stream-LSH increases recall when searching for similar items compared to alternative approaches using the same space capacity.",
keywords = "Dynamic popularity, Locality sensitive hashing, Retention policy, Similarity search, Stream search",
author = "Naama Kraus and David Carmel and Idit Keidar",
note = "Publisher Copyright: {\textcopyright} 2017 IEEE.; 5th IEEE International Conference on Big Data, Big Data 2017 ; Conference date: 11-12-2017 Through 14-12-2017",
year = "2017",
month = jul,
day = "1",
doi = "10.1109/BigData.2017.8258016",
language = "الإنجليزيّة",
series = "Proceedings - 2017 IEEE International Conference on Big Data, Big Data 2017",
pages = "964--969",
editor = "Jian-Yun Nie and Zoran Obradovic and Toyotaro Suzumura and Rumi Ghosh and Raghunath Nambiar and Chonggang Wang and Hui Zang and Ricardo Baeza-Yates and Xiaohua Hu and Jeremy Kepner and Alfredo Cuzzocrea and Jian Tang and Masashi Toyoda",
booktitle = "Proceedings - 2017 IEEE International Conference on Big Data, Big Data 2017",
}