@inproceedings{a344364700c74acda655bfec119afd77,
title = "Brief Announcement: Gradual Learning of Deep Recurrent Neural Network",
abstract = "Deep Recurrent Neural Networks (RNNs) achieve state-of-the-art results in many sequence-to-sequence modeling tasks. However, deep RNNs are difficult to train and tend to suffer from overfitting. Motivated by the Data Processing Inequality (DPI) we formulate the multi-layered network as a Markov chain, introducing a training method that comprises training the network gradually and using layer-wise gradient clipping. In total, we have found that applying our methods combined with previously introduced regularization and optimization methods resulted in improvement to the state-of-the-art architectures operating in language modeling tasks.",
keywords = "Data-processing-inequality, Machine-learning, Recurrent-neural-networks, Regularization, Training-methods",
author = "Ziv Aharoni and Gal Rattner and Haim Permuter",
note = "Publisher Copyright: {\textcopyright} 2018, Springer International Publishing AG, part of Springer Nature.; 2nd International Symposium on Cyber Security Cryptography and Machine Learning, CSCML 2018 ; Conference date: 21-06-2018 Through 22-06-2018",
year = "2018",
month = jun,
day = "17",
doi = "https://doi.org/10.1007/978-3-319-94147-9_21",
language = "American English",
isbn = "978-3-319-94146-2",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "274--277",
editor = "Itai Dinur and Shlomi Dolev and Sachin Lodha",
booktitle = "Cyber Security Cryptography and Machine Learning - Second International Symposium, CSCML 2018, Proceedings",
address = "Germany",
}