@inproceedings{28535ca0c0cd4c5fa458b9e6092cfb31,
title = "Stochastic Multi-Armed Bandits with Unrestricted Delay Distributions",
abstract = "We study the stochastic Multi-Armed Bandit (MAB) problem with random delays in the feedback received by the algorithm. We consider two settings: the reward-dependent delay setting, where realized delays may depend on the stochastic rewards, and the reward-independent delay setting. Our main contribution is algorithms that achieve near-optimal regret in each of the settings, with an additional additive dependence on the quantiles of the delay distribution. Our results do not make any assumptions on the delay distributions: in particular, we do not assume they come from any parametric family of distributions and allow for unbounded support and expectation; we further allow for infinite delays where the algorithm might occasionally not observe any feedback.",
author = "Tal Lancewicki and Shahar Segal and Tomer Koren and Yishay Mansour",
note = "Publisher Copyright: Copyright {\textcopyright} 2021 by the author(s); 38th International Conference on Machine Learning, ICML 2021 ; Conference date: 18-07-2021 Through 24-07-2021",
year = "2021",
language = "الإنجليزيّة",
series = "Proceedings of Machine Learning Research",
publisher = "ML Research Press",
pages = "5969--5978",
booktitle = "Proceedings of the 38th International Conference on Machine Learning, ICML 2021",
}