@inproceedings{408363c7fd5d4969b65ce9b0e0473cdc,
title = "Energy Regularized RNNS for solving non-stationary Bandit problems",
abstract = "We consider a Multi-Armed Bandit problem in which the re- wards are non-stationary and are dependent on past actions and potentially on past contexts. At the heart of our method, we employ a recurrent neural network, which models these sequences. In order to balance between exploration and exploitation, we present an energy minimization term that pre- vents the neural network from becoming too confident in support of a certain action. This term provably limits the gap between the maximal and minimal probabilities assigned by the network. In a diverse set of experiments, we demonstrate that our method is at least as effective as methods suggested to solve the sub-problem of Rotting Bandits, and can solve intuitive extensions of various benchmark problems. We share our implementation at https://github.com/rotmanmi/Energy-Regularized-RNN.",
author = "Michael Rotman and Lior Wolf",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 48th IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2023 ; Conference date: 04-06-2023 Through 10-06-2023",
year = "2023",
doi = "10.1109/ICASSP49357.2023.10095643",
language = "الإنجليزيّة",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing, Proceedings",
address = "الولايات المتّحدة",
}