@inproceedings{1a30f8a5fd5f41f68b46ed1a75965980,
title = "Improving term weighting for community question answering search using syntactic analysis",
abstract = "Query term weighting is a fundamental task in information retrieval and most popular term weighting schemes are primarily based on statistical analysis of term occurrences within the document collection. In this work we study how term weighting may benefit from syntactic analysis of the corpus. Focusing on Community-based Question Answering (CQA) sites, we take into account the syntactic function of the terms within CQA texts as an important factor affecting their relative importance for retrieval. We analyze a large log of web queries that landed on Yahoo Answers site, showing a strong deviation between the tendencies of different document words to appear in a landing (click-through) query given their syntactic function. To this end, we propose a novel term weighting method that makes use of the syntactic information available for each query term occurrence in the document, on top of term occurrence statistics. The relative importance of each feature is learned via a learning to rank algorithm that utilizes a click-through query log. We examine the new weighting scheme using manual evaluation based on editorial data and using automatic evaluation over the query log. Our experimental results show consistent improvement in retrieval when syntactic information is taken into account.",
keywords = "Community question answering, Dependency parsing, Learning to rank, Part-of-speech tagging, Term weighting",
author = "David Carmel and Avihai Mejer and Yuval Pinter and Idan Szpektor",
note = "Publisher Copyright: Copyright 2014 ACM.; 23rd ACM International Conference on Information and Knowledge Management, CIKM 2014 ; Conference date: 03-11-2014 Through 07-11-2014",
year = "2014",
month = nov,
day = "3",
doi = "10.1145/2661829.2661901",
language = "American English",
series = "CIKM 2014 - Proceedings of the 2014 ACM International Conference on Information and Knowledge Management",
pages = "351--360",
booktitle = "CIKM 2014 - Proceedings of the 2014 ACM International Conference on Information and Knowledge Management",
}