@InProceedings{Harnisch2024_1225,
author = {Philipp L. Harnisch and Stefan Hillmann},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2024},
title = {Empirical Evaluation of ASR and NLU in a Multimodal Dialogue System for Survey Answering},
year = {2024},
editor = {Timo Baumann},
month = mar,
pages = {211--218},
publisher = {TUDpress, Dresden},
abstract = {PROM surveys, used to measure the effect of rehabilitation treatments,
are typically filled out on paper, and often suffer from low response rates. Replacing
it with a multimodal survey system, supporting touch and speech interaction,
could lead to lower hurdles and therefore more data quantity. To do this, it requires
task-specific training samples for the Automatic Speech Recognition (ASR) and
Natural Language Understanding (NLU) to classify spoken answers into one of the
standardized PROM answer options.
Due to the lack of training data for medical PROM surveys, we created augmented
text samples with each answer option description, combined with different templates.
To improve training capabilities, introduce a proper test set, and evaluate
the ASR, we also collected 1,797 real voice samples within an empirical study.
Further, we incorporate the contextual knowledge of the current question into our
NLU architecture by implementing one classifier for every question scale.
Our results reveal that training with empirical data leads to better results than augmented
data from templates and original answer option descriptions. Because of
participant mislabeling of 33% due to the ambiguity of the task, we receive overall
low NLU performances with up to 51.1% accuracy, and rank-1-accuracy up to
79.3%. We also find that our implementation of many scale-specific NLU classifiers
significantly outperforms one NLU classifier for all labels, that incorporates
the same contextual knowledge after the prediction, by 8 percent points.},
isbn = {978-3-95908-325-6},
issn = {0940-6832},
keywords = {Poster},
url = {https://www.essv.de/pdf/2024_211_218.pdf},
doi = {10.35096/othr/pub-7100},
}