@InProceedings{Hintz2023_1174,
author = {Jan Hintz and Andreas Wendemuth and Ingo Siegert},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2023},
title = {Cross-Reliability Benchmark Test for Preserving Emotional Content in Speech–Synthesis Related Datasets},
year = {2023},
editor = {Christoph Draxler},
month = mar,
pages = {64--71},
publisher = {TUDpress, Dresden},
abstract = {Emotions play a crucial role in human-machine interaction (HMI), and their accurate representation in speech recordings is essential for creating natural and realistic affective computing components as speech emotion recognition and speech synthesis. However, evaluating the emotional content in speech is a difficult task, as there exist a vast amount of different emotional representations and there is no objective benchmark test to assess the cross-reliability of emotions in different datasets for the HMI domain. This paper evaluates the cross-reliability of emotional content using speech emotion recognition and valence-arousal-dominance prediction models. The study examines three emotional speech datasets, which were selected to represent a range of emotional content as well as different languages (English and German) and are developed in for speech synthesis task. Thereby, the paper especially focuses on the recently published Thorsten emotion dataset. The results of the conducted experiments showed that the Thorsten emotion dataset achieves state-of-the-art recognition rates on within corpus tests. The experiments also showed high cross-reliability of shared labels (happy/amused, neutral, angry) while unusual labels (drunk, drowsy, whispering) lead to higher confusion.},
isbn = {978-3-95908-303-4},
issn = {0940-6832},
keywords = {Emotion},
url = {https://www.essv.de/pdf/2023_64_71.pdf},
}