@inproceedings{9b2d8b260bc841608f4b84717010bd38,
title = "Evaluating D-MERIT of Partial-annotation on Information Retrieval",
abstract = "Retrieval models are often evaluated on partially-annotated datasets. Each query is mapped to a few relevant texts and the remaining corpus is assumed to be irrelevant. As a result, models that successfully retrieve falsely labeled negatives are punished in evaluation. Unfortunately, completely annotating all texts for every query is not resource efficient. In this work, we show that using partially-annotated datasets in evaluation can paint a distorted picture. We curate D-MERIT, a passage retrieval evaluation set from Wikipedia, aspiring to contain all relevant passages for each query. Queries describe a group (e.g., “journals about linguistics”) and relevant passages are evidence that entities belong to the group (e.g., a passage indicating that Language is a journal about linguistics). We show that evaluating on a dataset containing annotations for only a subset of the relevant passages might result in misleading ranking of the retrieval systems and that as more relevant texts are included in the evaluation set, the rankings converge. We propose our dataset as a resource for evaluation and our study as a recommendation for balance between resource-efficiency and reliable evaluation when annotating evaluation sets for text retrieval. Our dataset can be downloaded from https://D-MERIT.github.io.",
author = "Royi Rassin and Yaron Fairstein and Oren Kalinsky and Guy Kushilevitz and Nachshon Cohen and Alexander Libov and Yoav Goldberg",
note = "Publisher Copyright: {\textcopyright} 2024 Association for Computational Linguistics.; 2024 Conference on Empirical Methods in Natural Language Processing, EMNLP 2024 ; Conference date: 12-11-2024 Through 16-11-2024",
year = "2024",
month = jan,
day = "1",
language = "American English",
series = "EMNLP 2024 - 2024 Conference on Empirical Methods in Natural Language Processing, Proceedings of the Conference",
publisher = "Association for Computational Linguistics (ACL)",
pages = "2913--2932",
editor = "Yaser Al-Onaizan and Mohit Bansal and Yun-Nung Chen",
booktitle = "EMNLP 2024 - 2024 Conference on Empirical Methods in Natural Language Processing, Proceedings of the Conference",
address = "United States",
}