@InProceedings{Siegert2018_397,
author = {Ingo Siegert and Tang Shuran and Alicia Flores Lotz},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2018},
title = {Acoustic Addressee-Detection - Analysing the Impact of Age, Sex and Technical Knowledge},
year = {2018},
editor = {André Berton and Udo Haiber and Wolfgang Minker},
month = mar,
pages = {113--120},
publisher = {TUDpress, Dresden},
abstract = {Today, in technical dialog-systems diverse solutions are implemented to detect if a system should react to an uttered speech command. Typically used so- lutions are push-to-talk and keywords. Unfortunately, these solutions constitute an unnatural interaction to overcome the problem that the system is not able to detect when it is addressed. Moreover, the actual preferred keyword method can result in confusions when the keyword has been said but no interaction with the system was intended by the user. Therefore, technical systems should be able to perform an addressee detection. Various aspects have already been investigated in this field of research, however most of them pursue a multimodal approach including textual and/or visual information achieving up to 93% unweighted average recall. In our research, we limit ourselves to the pure acoustic information, as we assume that humans are talking differently to technical systems than to humans. Consider- ing speakers of different age-, sex- and technical background-groups, we analysed how a technical system and another human being is being addressed on two sub- sets. An addressee detection system based on acoustics-based was utilized and it was investigated to which extent the different speaker groups influence the recogni- tion rate in inter- and intra-group experiments. Our approach achieves competitive results of 84.45% to 98.06% unweighted average recall and 88.35% to 95.63% F1 score.},
isbn = {978-3-959081-28-3},
issn = {0940-6832},
keywords = {Poster},
url = {https://www.essv.de/pdf/2018_113_120.pdf},
}