@InProceedings{Siegert2018_397,
author = {Ingo Siegert and Tang Shuran and Alicia Flores Lotz},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2018},
title = {Acoustic Addressee-Detection - Analysing the Impact of Age, Sex and Technical Knowledge},
year = {2018},
editor = {André Berton and Udo Haiber and Wolfgang Minker},
month = mar,
pages = {113--120},
publisher = {TUDpress, Dresden},
abstract = {Today, in technical dialog-systems diverse solutions are implemented to
detect if a system should react to an uttered speech command. Typically used so-
lutions are push-to-talk and keywords. Unfortunately, these solutions constitute an
unnatural interaction to overcome the problem that the system is not able to detect
when it is addressed. Moreover, the actual preferred keyword method can result
in confusions when the keyword has been said but no interaction with the system
was intended by the user. Therefore, technical systems should be able to perform
an addressee detection. Various aspects have already been investigated in this field
of research, however most of them pursue a multimodal approach including textual
and/or visual information achieving up to 93% unweighted average recall.
In our research, we limit ourselves to the pure acoustic information, as we assume
that humans are talking differently to technical systems than to humans. Consider-
ing speakers of different age-, sex- and technical background-groups, we analysed
how a technical system and another human being is being addressed on two sub-
sets. An addressee detection system based on acoustics-based was utilized and it
was investigated to which extent the different speaker groups influence the recogni-
tion rate in inter- and intra-group experiments. Our approach achieves competitive
results of 84.45% to 98.06% unweighted average recall and 88.35% to 95.63% F1
score.},
isbn = {978-3-959081-28-3},
issn = {0940-6832},
keywords = {Poster},
url = {https://www.essv.de/pdf/2018_113_120.pdf},
}