@inproceedings{09b69b4012fd4923a03b18530bdccd4e,
title = "CleanEr: Interactive, Query-Guided Error Mitigation for Data Cleaning Systems",
abstract = "A key challenge in data cleaning is estimating which of the tuples in a given database are correct and which are not. However, the output of such systems typically includes both false positives and false negatives, i.e., incorrect tuples labeled as correct and vice versa. When queries are performed over the output of such cleaning systems, cleaning errors may have an intricate impact on the query results. We introduce CleanEr, a generic framework that is used on top of existing data cleaning systems and that assists users in identifying the impact of potential cleaning errors on query results, and in deciding accordingly whether and how to proceed with the cleaning. We introduce novel indicators reflecting the current uncertainty with respect to the tuples in the query result, as well as the effect of each relevant input tuple on this uncertainty. We design and implement efficient algorithms for computing these indicators in CleanEr. Based on these indicators, CleanEr helps the data analysts decide whether to trust the query output and guides them in further cleaning of relevant parts of the data through an interactive process. We propose to demonstrate CleanEr using NELL, a large database extracted from the Web.",
keywords = "data cleaning, provenance, uncertain databases",
author = "Ran Schreiber and Yael Amsterdamer",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 40th IEEE International Conference on Data Engineering, ICDE 2024 ; Conference date: 13-05-2024 Through 17-05-2024",
year = "2024",
doi = "10.1109/icde60146.2024.00418",
language = "الإنجليزيّة",
series = "Proceedings - International Conference on Data Engineering",
publisher = "IEEE Computer Society",
pages = "5421--5424",
booktitle = "Proceedings - 2024 IEEE 40th International Conference on Data Engineering, ICDE 2024",
address = "الولايات المتّحدة",
}