@inproceedings{3d446b16a77b46c18f8192cd5d765939,
title = "Finite sample analyses for TD(0) with function approximation",
abstract = "TD(0) is one of the most commonly used algorithms in reinforcement learning. Despite this, there is no existing finite sample analysis for TD(0) with function approximation, even for the linear case. Our work is the first to provide such results. Works that managed to obtain convergence rates for online Temporal Difference (TD) methods analyzed somewhat modified versions of them that include projections and stepsize dependent on unknown problem parameters. Our analysis obviates these artificial alterations by exploiting strong properties of TD(0). We provide convergence rates both in expectation and with high-probability. Both are based on relatively unknown, recently developed stochastic approximation techniques.",
author = "Gal Dalal and Bal{\'a}zs Sz{\"o}r{\'e}nyi and Gugan Thoppe and Shie Mannor",
note = "Publisher Copyright: Copyright {\textcopyright} 2018, Association for the Advancement of Artificial Intelligence (www.aaai.org). All rights reserved.; 32nd AAAI Conference on Artificial Intelligence, AAAI 2018 ; Conference date: 02-02-2018 Through 07-02-2018",
year = "2018",
language = "الإنجليزيّة",
series = "32nd AAAI Conference on Artificial Intelligence, AAAI 2018",
pages = "6144--6160",
booktitle = "32nd AAAI Conference on Artificial Intelligence, AAAI 2018",
}