@inproceedings{675f4e87ebd8459b874b5ff4fa1e5af4,
title = "Dataset and Evaluation of Automatic Speech Recognition for Multi-lingual Intent Recognition on Social Robots",
abstract = "While Automatic Speech Recognition (ASR) systems excel in controlled environments, challenges arise in robot-specific setups due to unique microphone requirements and added noise sources. In this paper, we create a dataset of initiating conversations with brief exchanges in 5 European languages, and we systematically evaluate current state-of-art ASR systems (Vosk, OpenWhisper, Google Speech and NVidia Riva). Besides standard metrics, we also look at two critical downstream tasks for human-robot verbal interaction: intent recognition rate and entity extraction, using the open-source Rasa chatbot. Overall, we found that open-source solutions as Vosk performs competitively with closed-source solutions while running on the edge, on a low compute budget (CPU only).",
keywords = "Assistive Robotics, Audio Dataset, Automatic Speech Recognition, Human-Robot Interaction",
author = "Antonio Andriella and Raquel Ros and Yoav Ellinson and Sharon Gannot and S{\'e}verin Lemaignan",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE Computer Society. All rights reserved.; 19th Annual ACM/IEEE International Conference on Human-Robot Interaction, HRI 2024 ; Conference date: 11-03-2024 Through 15-03-2024",
year = "2024",
month = mar,
day = "11",
doi = "https://doi.org/10.1145/3610977.3637473",
language = "الإنجليزيّة",
series = "ACM/IEEE International Conference on Human-Robot Interaction",
publisher = "IEEE Computer Society",
pages = "865--869",
booktitle = "HRI 2024 - Proceedings of the 2024 ACM/IEEE International Conference on Human-Robot Interaction",
address = "الولايات المتّحدة",
}