@inproceedings{bbb695f49eb7453599ddf0b0df5a6e15,
title = "Scalable evaluation and improvement of document set expansion via neural positive-unlabeled learning",
abstract = "We consider the situation in which a user has collected a small set of documents on a cohesive topic, and they want to retrieve additional documents on this topic from a large collection. Information Retrieval (IR) solutions treat the document set as a query, and look for similar documents in the collection. We propose to extend the IR approach by treating the problem as an instance of positive-unlabeled (PU) learning-i.e., learning binary classifiers from only positive (the query documents) and unlabeled (the results of the IR engine) data. Utilizing PU learning for text with big neural networks is a largely unexplored field. We discuss various challenges in applying PU learning to the setting, showing that the standard implementations of state-of-the-art PU solutions fail. We propose solutions for each of the challenges and empirically validate them with ablation tests. We demonstrate the effectiveness of the new method using a series of experiments of retrieving PubMed abstracts adhering to fine-grained topics, showing improvements over the common IR solution and other baselines.",
author = "Alon Jacovi and Gang Niu and Yoav Goldberg and Masashi Sugiyama",
note = "Publisher Copyright: {\textcopyright} 2021 Association for Computational Linguistics; 16th Conference of the European Chapter of the Associationfor Computational Linguistics, EACL 2021 ; Conference date: 19-04-2021 Through 23-04-2021",
year = "2021",
month = jan,
day = "1",
language = "الإنجليزيّة",
series = "EACL 2021 - 16th Conference of the European Chapter of the Association for Computational Linguistics, Proceedings of the Conference",
publisher = "Association for Computational Linguistics (ACL)",
pages = "581--592",
booktitle = "EACL 2021 - 16th Conference of the European Chapter of the Association for Computational Linguistics, Proceedings of the Conference",
address = "الولايات المتّحدة",
}