@inproceedings{213e8bfae5a74f52bb609697ff2b59c3,
title = "Shame to be Sham: Addressing content-based grey hat search engine optimization",
abstract = "We present an initial study identifying a form of content-based grey hat search engine optimization, in which a Web page contains both potentially relevant content and manipulated content: we call such pages sham documents, because they lie in the grey area between 'ham' (clearly normal) and 'spam' (clearly fake). Sham documents are often ranked ar-tifcially high in response to certain queries, but also may contain some useful information and cannot be considered as absolute spam. We report a novel annotation efort performed with the ClueWeb09 benchmark where pages were labeled as being spam, sham, or legitimate content. Sig-nifcant inter-annotator agreement rates support the claim that there are sham documents that are highly ranked by a very efective retrieval approach, yet are not spam. We also present an initial study of predictors that may indicate whether a query is the target of shamming.",
keywords = "Search engine optimization, Sham, Spam",
author = "Fiana Raiber and Kevyn Collins-Thompson and Oren Kurland",
year = "2013",
doi = "https://doi.org/10.1145/2484028.2484135",
language = "الإنجليزيّة",
isbn = "9781450320344",
series = "SIGIR 2013 - Proceedings of the 36th International ACM SIGIR Conference on Research and Development in Information Retrieval",
pages = "1013--1016",
booktitle = "SIGIR 2013 - Proceedings of the 36th International ACM SIGIR Conference on Research and Development in Information Retrieval",
note = "36th International ACM SIGIR Conference on Research and Development in Information Retrieval, SIGIR 2013 ; Conference date: 28-07-2013 Through 01-08-2013",
}