@inproceedings{b6838d7d12484cd585ec2387a544d1b1,
title = "Improving reliability of word similarity evaluation by redesigning annotation task and performance measure",
abstract = "We suggest a new method for creating and using gold-standard datasets for word similarity evaluation. Our goal is to improve the reliability of the evaluation, and we do this by redesigning the annotation task to achieve higher inter-rater agreement, and by defining a performance measure which takes the reliability of each annotation decision in the dataset into account.",
author = "Oded Avraham and Yoav Goldberg",
note = "Publisher Copyright: {\textcopyright} 2016 Proceedings of the Annual Meeting of the Association for Computational Linguistics. All Rights Reserved.; 1st Workshop on Evaluating Vector-Space Representations for NLP, RepEval 2016 at the 54th Annual Meeting of the Association for Computational Linguistics, ACL 2016 ; Conference date: 07-08-2016",
year = "2016",
month = jan,
day = "1",
doi = "https://doi.org/10.18653/v1/w16-2519",
language = "American English",
series = "Proceedings of the Annual Meeting of the Association for Computational Linguistics",
publisher = "Association for Computational Linguistics (ACL)",
pages = "106--110",
booktitle = "Proceedings of the 1st Workshop on Evaluating Vector-Space Representations for NLP, RepEval 2016 at the 54th Annual Meeting of the Association for Computational Linguistics, ACL 2016",
address = "United States",
}