@InProceedings{Venkateswaran2021_1096,
author = {Siddarth Venkateswaran and Ronald Böck and Thomas Keßler and Ossmane Krini},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2021},
title = {Pseudo-Labelling and Transfer Learning Based Speech Emotion Recognition},
year = {2021},
editor = {Stefan Hillmann and Benjamin Weiss and Thilo Michael and Sebastian Möller},
month = mar,
pages = {19--26},
publisher = {TUDpress, Dresden},
abstract = {This paper describes speech emotion recognition as an image classification problem using pseudo-labelling techniques, in which there is an availability of only a handful of labelled samples. Low-level acoustic features like log spectrograms and mel spectrograms, extracted from audio files, classified as different emotions were fed as RGB images to Convolutional Neural Networks (CNNs) to train the classification algorithms on. While CNNs have achieved state-of-the-art performances for image classification tasks, they however thrive on a lot of labelled data. This paper applies transfer learning by using a CNN pre-trained on a huge corpus of labelled image data, learning the distinct features on a handful of available labelled spoken data, and utilising this knowledge in iteratively generating machine-confident pseudo-labels for unlabelled acoustic data. Comparisons were made by evaluating a neural network trained using full-supervision, and that using semi-supervision with a combination of labelled and pseudo-labelled data. All the experiments were implemented on nine commonly used benchmark corpora, allowing also comparison to already published results.},
isbn = {978-3-959082-27-3},
issn = {0940-6832},
keywords = {Paralinguistik},
url = {https://www.essv.de/pdf/pdf/2021_19_26.pdf},
}