@inproceedings{a62eb814f2264b22998ea4f0eee7b798,
title = "You Can Have Your Data and Balance It Too: Towards Balanced and Efficient Multilingual Models",
abstract = "Multilingual models have been widely used for cross-lingual transfer to low-resource languages. However, the performance on these languages is hindered by their underrepresentation in the pretraining data. To alleviate this problem, we propose a novel multilingual training technique based on teacherstudent knowledge distillation. In this setting, we utilize monolingual teacher models optimized for their language. We use those teachers along with balanced (sub-sampled) data to distill the teachers knowledge into a single multilingual student. Our method outperforms standard training methods in lowresource languages and retains performance on high-resource languages.",
author = "Tomasz Limisiewicz and Dan Malkin and Gabriel Stanovsky",
note = "Publisher Copyright: {\textcopyright} 2023 Association for Computational Linguistics.; 5th Workshop on Research in Computational Linguistic Typology and Multilingual NLP, SIGTYP 2023, co-located with the 17th Conference of the European Chapter of the Association for Computational Linguistics, EACL 2023 ; Conference date: 06-05-2023",
year = "2023",
language = "الإنجليزيّة",
series = "SIGTYP 2023 - 5th Workshop on Research in Computational Linguistic Typology and Multilingual NLP, Proceedings of the Workshop",
pages = "1--11",
editor = "Lisa Beinborn and Koustava Goswami and Saliha Muradoglu and Alexey Sorokin and Ritesh Kumar and Andreas Shcherbakov and Ponti, {Edoardo M.} and Ryan Cotterell and Ekaterina Vylomova",
booktitle = "SIGTYP 2023 - 5th Workshop on Research in Computational Linguistic Typology and Multilingual NLP, Proceedings of the Workshop",
}