@InProceedings{Fietkau2025_1237,
author = {Arne-Lukas Fietkau and João Menezes and Peter Birkholz},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2025},
title = {Evaluating optopalatography sensor positions for command word recognition},
year = {2025},
editor = {Sven Grawunder},
month = mar,
pages = {69--76},
publisher = {TUDpress, Dresden},
abstract = {Optopalatography is an articulatory measurement technique with a broad range of applications, e.g. in speech therapy and Silent Speech Interfaces (SSI). This papers uses a custom-developed OPG device as SSI for the task of command word recognition and investigates the relevance of different sensing positions, namely at the lips, at midsagittal positions, and at lateral positions, for the recognition accuracy via an ablation study. A corpus consisting of 10 repetitions of 100 different words was recorded by 4 speakers and served as input data for a recurrent neural network. Three types of evaluations were carried out: single speaker, leaveone-speaker-out and multi-speaker. The mean accuracies obtained in the single speaker and leave-one-speaker-out evaluations were 81.25% and 47.93%, respectively. The multi-speaker accuracy was 90.25% and is comparable to the stateof-the-art. The ablation study results showed that the single speaker recognition using only midsagittal sensors yielded a relative decrease in accuracy of 4.92% in comparison to when all sensors were considered. In multi-speaker evaluation, on the other hand, sensing configurations using either lip or lateral sensors showed relative accuracy decreases of 2.49% and 1.94%, respectively. The more sensors are removed from the input data, the larger are the accuracy decreases, meaning all sensing positions improve recognition accuracy},
isbn = {978-3-95908-803-9},
issn = {0940-6832},
keywords = {Recognition in HMI and Therapeutic Applications},
url = {https://www.essv.de/pdf/2025_69_76.pdf},
}