@inproceedings{a8d18f1393ab48eeaf2712f3a6fa1378,

title = "A Direct Sum Result for the Information Complexity of Learning",

abstract = "How many bits of information are required to PAC learn a class of hypotheses of VC dimension $d$? The mathematical setting we follow is that of Bassily et al., where the value of interest is the mutual information $I(S;A(S))$ between the input sample $S$ and the hypothesis outputted by the learning algorithm $A$. We introduce a class of functions of VC dimension $d$ over the domain $X$ with information complexity at least $Omega dlog log |X|d$ bits for any consistent and proper algorithm (deterministic or random). Bassily et al. proved a similar (but quantitatively weaker) result for the case $d=1$. The above result is in fact a special case of a more general phenomenon we explore. We define the notion of em information complexity of a given class of functions $. Intuitively, it is the minimum amount of information that an algorithm for $X$ must retain about its input to ensure consistency and properness. We prove a direct sum result for information complexity in this context; roughly speaking, the information complexity sums when combining several classes.",

author = "Ido Nachum and Jonathan Shafer and Amir Yehudayoff",

year = "2018",

month = jun,

day = "1",

language = "!!Undefined/Unknown",

volume = "75",

series = "Proceedings of Machine Learning Research",

publisher = "PMLR",

pages = "1547--1568",

editor = "S{\'e}bastien Bubeck and Vianney Perchet and Philippe Rigollet",

booktitle = "Proceedings of the 31st Conference On Learning Theory",

}