@InProceedings{Christmann2021_1097,
author = {Lara-Sophie Christmann},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2021},
title = {Emotion Bias in Automatic Speech Recognition},
year = {2021},
editor = {Stefan Hillmann and Benjamin Weiss and Thilo Michael and Sebastian Möller},
month = mar,
pages = {27--34},
publisher = {TUDpress, Dresden},
abstract = {In this paper, we investigate the effect of emotions on an established automatic speech recognition system using five emotional speech databases covering English, German, and Italian language. We computed the word error rates and the significance of the ratio between the correctly and incorrectly recognized words per emotion category. Results showed a strong bias with an increase in word error rates of up to +73.7% when compared to neutral speech. The correlation between emotional categories and error rates was significant at p = 0.001 for all datasets. We further tested the applicability of an existing CycleGAN for emotional speech conversion as a preprocessing step to transform speech to neutral state. The de-emotionalized speech produced by the trained networks was retested for recognition rates in comparison to emotionalized neutral speech.},
isbn = {978-3-959082-27-3},
issn = {0940-6832},
keywords = {Paralinguistik},
url = {https://www.essv.de/pdf/2021_27_34.pdf},
}