@inproceedings{48bf3433feaf41baa2912a5eb363faaa,
title = "Sign Based Derivative Filtering for Stochastic Gradient Descent",
abstract = "We study the performance of stochastic gradient descent (SGD) in deep neural network (DNN) models. We show that during a single training epoch the signs of the partial derivatives of the loss with respect to a single parameter are distributed almost uniformly over the minibatches. We propose an optimization routine, where we maintain a moving average history of the sign of each derivative. This history is used to classify new derivatives as “exploratory” if they disagree with the sign of the history. Conversely, we classify the new derivatives as “exploiting” if they agree with the sign of the history. Each derivative is weighed according to our classification, providing control over exploration and exploitation. The proposed approach leads to training a model with higher accuracy as we demonstrate through a series of experiments.",
keywords = "Deep learning, Gradients, Neural networks, Optimization",
author = "Konstantin Berestizshevsky and Guy Even",
note = "Publisher Copyright: {\textcopyright} 2019, Springer Nature Switzerland AG.; 28th International Conference on Artificial Neural Networks, ICANN 2019 ; Conference date: 17-09-2019 Through 19-09-2019",
year = "2019",
doi = "https://doi.org/10.1007/978-3-030-30484-3_18",
language = "الإنجليزيّة",
isbn = "9783030304836",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
pages = "208--219",
editor = "Tetko, {Igor V.} and Pavel Karpov and Fabian Theis and Vera Kurkov{\'a}",
booktitle = "Artificial Neural Networks and Machine Learning – ICANN 2019",
}