@inproceedings{67187fd9cedd432aa259d994fc395949,
title = "Diagonal State Spaces are as Effective as Structured State Spaces",
abstract = "Modeling long range dependencies in sequential data is a fundamental step towards attaining human-level performance in many modalities such as text, vision, audio and video. While attention-based models are a popular and effective choice in modeling short-range interactions, their performance on tasks requiring long range reasoning has been largely inadequate. In an exciting result, Gu et al. (2022) proposed the Structured State Space (S4) architecture delivering large gains over state-of-the-art models on several long-range tasks across various modalities. The core proposition of S4 is the parameterization of state matrices via a diagonal plus low rank structure, allowing efficient computation. In this work, we show that one can match the performance of S4 even without the low rank correction and thus assuming the state matrices to be diagonal. Our Diagonal State Space (DSS) model matches the performance of S4 on Long Range Arena tasks, speech classification on Speech Commands dataset, while being conceptually simpler and straightforward to implement.",
author = "Ankit Gupta and Albert Gu and Jonathan Berant",
note = "Publisher Copyright: {\textcopyright} 2022 Neural information processing systems foundation. All rights reserved.; 36th Conference on Neural Information Processing Systems, NeurIPS 2022 ; Conference date: 28-11-2022 Through 09-12-2022",
year = "2022",
language = "الإنجليزيّة",
series = "Advances in Neural Information Processing Systems",
publisher = "Neural information processing systems foundation",
editor = "S. Koyejo and S. Mohamed and A. Agarwal and D. Belgrave and K. Cho and A. Oh",
booktitle = "Advances in Neural Information Processing Systems 35 - 36th Conference on Neural Information Processing Systems, NeurIPS 2022",
address = "الولايات المتّحدة",
}