@inproceedings{a26a830f6f8445e68e72aef7573e9ccd,
title = "Eliciting User Preferences for Personalized Multi-Objective Decision Making through Comparative Feedback",
abstract = "In this work, we propose a multi-objective decision making framework that accommodates different user preferences over objectives, where preferences are learned via policy comparisons. Our model consists of a known Markov decision process with a vector-valued reward function, with each user having an unknown preference vector that expresses the relative importance of each objective. The goal is to efficiently compute a near-optimal policy for a given user. We consider two user feedback models. We first address the case where a user is provided with two policies and returns their preferred policy as feedback. We then move to a different user feedback model, where a user is instead provided with two small weighted sets of representative trajectories and selects the preferred one. In both cases, we suggest an algorithm that finds a nearly optimal policy for the user using a number of comparison queries that scales quasilinearly in the number of objectives.",
author = "Han Shao and Lee Cohen and Avrim Blum and Yishay Mansour and Aadirupa Saha and Walter, {Matthew R.}",
note = "Publisher Copyright: {\textcopyright} 2023 Neural information processing systems foundation. All rights reserved.; 37th Conference on Neural Information Processing Systems, NeurIPS 2023 ; Conference date: 10-12-2023 Through 16-12-2023",
year = "2023",
language = "الإنجليزيّة",
series = "Advances in Neural Information Processing Systems",
editor = "A. Oh and T. Neumann and A. Globerson and K. Saenko and M. Hardt and S. Levine",
booktitle = "Advances in Neural Information Processing Systems 36 - 37th Conference on Neural Information Processing Systems, NeurIPS 2023",
}