@inproceedings{caa155d66f974bf4bb026f4c56b28d2d,
title = "Minimization of Classifier Construction Cost for Search Queries",
abstract = "Search over massive sets of items is the cornerstone of many modern applications. Users express a set of properties and expect the system to retrieve qualifying items. A common difficulty, however, is that the information on whether an item satisfies the search criteria is not explicitly recorded in the repository. Instead, it may be general knowledge or {"}hidden{"} in a picture/description, leading to incomplete search results. To overcome this problem, companies build dedicated classifiers that determine which items satisfy the given criteria. However, building classifiers requires volumes of high-quality labeled training data. Since the costs of training classifiers for different subsets of properties can vastly differ, the choice of which classifiers to train has great monetary significance. The goal of our research is to devise effective algorithms to choose which classifiers one should train to address a given query load while minimizing the cost. Previous work considered a simplified model with uniform classifier costs, and queries with two properties. We remove these restrictions in our model. We prove NP-hard inapproximability bounds and devise several algorithms with approximation guarantees. Moreover, we identify a common special case for which we provide an exact algorithm. Our experiments, performed over real-life datasets, demonstrate the effectiveness and efficiency of our algorithms.",
keywords = "classifiers, classifiers construction cost, e-commerce, search queries",
author = "Shay Gershtein and Tova Milo and Gefen Morami and Slava Novgorodov",
note = "Publisher Copyright: {\textcopyright} 2020 Association for Computing Machinery.; 2020 ACM SIGMOD International Conference on Management of Data, SIGMOD 2020 ; Conference date: 14-06-2020 Through 19-06-2020",
year = "2020",
month = jun,
day = "14",
doi = "10.1145/3318464.3389755",
language = "الإنجليزيّة",
series = "Proceedings of the ACM SIGMOD International Conference on Management of Data",
publisher = "Association for Computing Machinery",
pages = "1351--1365",
booktitle = "SIGMOD 2020 - Proceedings of the 2020 ACM SIGMOD International Conference on Management of Data",
address = "الولايات المتّحدة",
}