@InProceedings{Harnisch2024_1225,
author = {Philipp L. Harnisch and Stefan Hillmann},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2024},
title = {Empirical Evaluation of ASR and NLU in a Multimodal Dialogue System for Survey Answering},
year = {2024},
editor = {Timo Baumann},
month = mar,
pages = {211--218},
publisher = {TUDpress, Dresden},
abstract = {PROM surveys, used to measure the effect of rehabilitation treatments, are typically filled out on paper, and often suffer from low response rates. Replacing it with a multimodal survey system, supporting touch and speech interaction, could lead to lower hurdles and therefore more data quantity. To do this, it requires task-specific training samples for the Automatic Speech Recognition (ASR) and Natural Language Understanding (NLU) to classify spoken answers into one of the standardized PROM answer options. Due to the lack of training data for medical PROM surveys, we created augmented text samples with each answer option description, combined with different templates. To improve training capabilities, introduce a proper test set, and evaluate the ASR, we also collected 1,797 real voice samples within an empirical study. Further, we incorporate the contextual knowledge of the current question into our NLU architecture by implementing one classifier for every question scale. Our results reveal that training with empirical data leads to better results than augmented data from templates and original answer option descriptions. Because of participant mislabeling of 33% due to the ambiguity of the task, we receive overall low NLU performances with up to 51.1% accuracy, and rank-1-accuracy up to 79.3%. We also find that our implementation of many scale-specific NLU classifiers significantly outperforms one NLU classifier for all labels, that incorporates the same contextual knowledge after the prediction, by 8 percent points.},
isbn = {978-3-95908-325-6},
issn = {0940-6832},
keywords = {Poster},
url = {https://www.essv.de/pdf/2024_211_218.pdf},
doi = {10.35096/othr/pub-7100},
}