@InProceedings{Hintz2023_1174,
author = {Jan Hintz and Andreas Wendemuth and Ingo Siegert},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2023},
title = {Cross-Reliability Benchmark Test for Preserving Emotional Content in Speech–Synthesis Related Datasets},
year = {2023},
editor = {Christoph Draxler},
month = mar,
pages = {64--71},
publisher = {TUDpress, Dresden},
abstract = {Emotions play a crucial role in human-machine interaction (HMI), and
their accurate representation in speech recordings is essential for creating natural
and realistic affective computing components as speech emotion recognition and
speech synthesis. However, evaluating the emotional content in speech is a difficult
task, as there exist a vast amount of different emotional representations and there is
no objective benchmark test to assess the cross-reliability of emotions in different
datasets for the HMI domain. This paper evaluates the cross-reliability of emotional
content using speech emotion recognition and valence-arousal-dominance prediction
models. The study examines three emotional speech datasets, which were selected
to represent a range of emotional content as well as different languages (English
and German) and are developed in for speech synthesis task. Thereby, the paper
especially focuses on the recently published Thorsten emotion dataset.
The results of the conducted experiments showed that the Thorsten emotion dataset
achieves state-of-the-art recognition rates on within corpus tests. The experiments
also showed high cross-reliability of shared labels (happy/amused, neutral, angry)
while unusual labels (drunk, drowsy, whispering) lead to higher confusion.},
isbn = {978-3-95908-303-4},
issn = {0940-6832},
keywords = {Emotion},
url = {https://www.essv.de/pdf/2023_64_71.pdf},
}