@inproceedings{559bb47f995e4566b13e702897bc23f4,
title = "Ml-based arm recommendation in short-horizon mabs",
abstract = "In many settings where an agent needs to suggest or recommend a course of action to its user, the agent's goal may not fully align with the user's goal. In particular, the agent may maximize its benefit if the user chooses specific alternatives that are not necessarily the ones that maximize her own individual benefit. In this paper we study such setting in the context of providing advice in two-Armed bandit problems. We explore a potential strategy for the agent aiming to influence the arm to be picked. In particular we focus on a somehow naive recommendation strategy that always recommend the preferred arm and a strategy that recommends based on various Machine Learning models that aim to guide the decision regarding when to switch to the agent's least preferred arm. Based on extensive evaluation we find that both recommendation strategies results in better performance compared to not making any recommendation, and that the naive recommendation strategy performs slightly better than the ML-based recommendations, despite using a substantial amount of training data for the latter.",
keywords = "Hai experimental methods, Human-virtual agent interaction, Machine learning, Monte-carlo simulation, Multi armed bandit, Recommender agents",
author = "Or Zipori and David Sarne",
note = "Publisher Copyright: {\textcopyright} 2021 Owner/Author.; 9th International User Modeling, Adaptation and Personalization Human-Agent Interaction, HAI 2021 ; Conference date: 09-11-2021 Through 11-11-2021",
year = "2021",
month = nov,
day = "9",
doi = "10.1145/3472307.3484673",
language = "الإنجليزيّة",
series = "HAI 2021 - Proceedings of the 9th International User Modeling, Adaptation and Personalization Human-Agent Interaction",
pages = "377--381",
booktitle = "HAI 2021 - Proceedings of the 9th International User Modeling, Adaptation and Personalization Human-Agent Interaction",
}