@inproceedings{71e1da98b2884a158080545846d800fe,
title = "NEXUS: On Explaining Confounding Bias",
abstract = "When analyzing large datasets, analysts are often interested in the explanations for unexpected results produced by their queries. In this work, we focus on aggregate SQL queries that expose correlations in the data. A major challenge that hinders the interpretation of such queries is confounding bias, which can lead to an unexpected association between variables. For example, a SQL query computes the average Covid-19 death rate in each country, may expose a puzzling correlation between the country and the death rate. In this work, we demonstrate NEXUS, a system that generates explanations in terms of a set of potential confounding variables that explain the unexpected correlation observed in a query. NEXUS mines candidate confounding variables from external sources since, in many real-life scenarios, the explanations are not solely contained in the input data. For instance, NEXUS might extract data about factors explaining the association between countries and the Covid-19 death rate, such as information about countries' economies and health outcomes. We will demonstrate the utility of NEXUS for investigating unexpected query results by interacting with the SIGMOD'23 participants, who will act as data analysts.",
keywords = "aggregated SQL queries, confounding bias, knowledge graphs",
author = "Brit Youngmann and Michael Cafarella and Yuval Moskovitch and Babak Salimi",
note = "Publisher Copyright: {\textcopyright} 2023 ACM.; 2023 ACM/SIGMOD International Conference on Management of Data, SIGMOD 2023 ; Conference date: 18-06-2023 Through 23-06-2023",
year = "2023",
month = jun,
day = "4",
doi = "https://doi.org/10.1145/3555041.3589728",
language = "الإنجليزيّة",
series = "Proceedings of the ACM SIGMOD International Conference on Management of Data",
publisher = "Association for Computing Machinery",
pages = "171--174",
booktitle = "SIGMOD 2023 - Companion of the 2023 ACM/SIGMOD International Conference on Management of Data",
address = "الولايات المتّحدة",
}