@inproceedings{604bcbd3dd7149f7b9d6306dba87c935,
title = "Selecting Sub-tables for Data Exploration",
abstract = "Data scientists frequently examine the raw content of large tables when exploring an unknown dataset. In such cases, small subsets of the full tables (sub-tables) that accurately capture table contents are useful. We present a framework which, given a large data table T, creates a sub-table of small, fixed dimensions by selecting a subset of T's rows and projecting them over a subset of T's columns. The question is: Which rows and columns should be selected to yield an informative sub-table?Our first contribution is an informativeness metric for sub-tables with two complementary dimensions: cell coverage, which measures how well the sub-table captures prominent data patterns in T, and diversity. We use association rules as the patterns captured by sub-tables, and show that computing optimal sub-tables directly using this metric is infeasible. We then develop an efficient algorithm that indirectly accounts for association rules using table embedding. The resulting framework produces sub-tables for the full table as well as for the results of queries over the table, enabling the user to quickly understand results and determine subsequent queries. Experimental results show that high-quality sub-tables can be efficiently computed, and verify the soundness of our metrics as well as the usefulness of selected sub-tables through user studies.",
keywords = "Interactive data exploration and discovery",
author = "Yael Amsterdamer and Davidson, {Susan B.} and Tova Milo and Kathy Razmadze and Amit Somech",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 39th IEEE International Conference on Data Engineering, ICDE 2023 ; Conference date: 03-04-2023 Through 07-04-2023",
year = "2023",
doi = "https://doi.org/10.1109/ICDE55515.2023.00192",
language = "الإنجليزيّة",
series = "Proceedings - International Conference on Data Engineering",
publisher = "IEEE Computer Society",
pages = "2496--2509",
booktitle = "Proceedings - 2023 IEEE 39th International Conference on Data Engineering, ICDE 2023",
address = "الولايات المتّحدة",
}