@inproceedings{e064380a6d2c433bb1e8bc41e2e00993,
title = "Sub-sampling for multi-armed bandits",
abstract = "The stochastic multi-armed bandit problem is a popular model of the exploration/exploitation trade-off in sequential decision problems. We introduce a novel algorithm that is based on sub-sampling. Despite its simplicity, we show that the algorithm demonstrates excellent empirical performances against state-of-the-art algorithms, including Thompson sampling and KL-UCB. The algorithm is very flexible, it does need to know a set of reward distributions in advance nor the range of the rewards. It is not restricted to Bernoulli distributions and is also invariant under rescaling of the rewards. We provide a detailed experimental study comparing the algorithm to the state of the art, the main intuition that explains the striking results, and conclude with a finite-time regret analysis for this algorithm in the simplified two-arm bandit setting.",
keywords = "Multi-armed Bandits, Reinforcement Learning, Sub-sampling",
author = "Akram Baransi and Maillard, {Odalric Ambrym} and Shie Mannor",
year = "2014",
doi = "10.1007/978-3-662-44848-9_8",
language = "الإنجليزيّة",
isbn = "9783662448472",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
number = "PART 1",
pages = "115--131",
booktitle = "Machine Learning and Knowledge Discovery in Databases - European Conference, ECML PKDD 2014, Proceedings",
edition = "PART 1",
note = "European Conference on Machine Learning and Knowledge Discovery in Databases, ECML PKDD 2014 ; Conference date: 15-09-2014 Through 19-09-2014",
}