@inproceedings{f73c81f0bf2e479e97fab3cd81013415,
title = "Speech Emotion Recognition Using Auditory Spectrogram and Cepstral Features",
abstract = "A systematic comparison on the impact of environmental noises on key acoustic features is critical in order to transfer speech emotion recognition (SER) systems into real world applications. In this study, we investigate the noise-tolerance of different acoustic features in distinguishing various emotions by comparing the SER classification performance on clean speech signals and noisy speech signals. We extract the spectrum and cepstral parameters based on human auditory characteristics and develop machine learning algorithms to classify four types of emotions using these features. Experimental results across the clean and noisy data show that compared to cepstral features, the auditory spectrogram-based features can achieve higher recognition accuracy for low signal-to-noise ratios (SNRs), but lower accuracy for high SNRs. Gammatone filter cepstral coefficients (GFCCs) outperformed all the extracted features on the Berlin database of emotional speech (EmoDB), under all four kinds of tested noise conditions. These results show compensation relationships between auditory spectrogram-based features and cepstral features for SER with better noise robustness in real-world applications.",
keywords = "Emotion recognition, Feature extraction, Machine learning, Noise, Pattern recognition, Speech signals",
author = "Shujie Zhao and Yan Yang and Israel Cohen and Lijun Zhang",
note = "Publisher Copyright: {\textcopyright} 2021 European Signal Processing Conference. All rights reserved.; 29th European Signal Processing Conference, EUSIPCO 2021 ; Conference date: 23-08-2021 Through 27-08-2021",
year = "2021",
doi = "10.23919/EUSIPCO54536.2021.9616144",
language = "الإنجليزيّة",
series = "European Signal Processing Conference",
pages = "136--140",
booktitle = "29th European Signal Processing Conference, EUSIPCO 2021 - Proceedings",
}