@InProceedings{Hirsch2020_431,
author = {Hans-Günter Hirsch and Alexander Micheel and Michael Gref},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2020},
title = {Keyword Detection for the Activation of Speech Dialogue Systems},
year = {2020},
editor = {Andreas Wendemuth and Ronald Böck and Ingo Siegert},
month = mar,
pages = {2--9},
publisher = {TUDpress, Dresden},
abstract = {The detection and recognition of a spoken keyword is an adequate and
comfortable method to activate a speech dialogue system. A low false acceptance
rate (FAR) is needed to avoid the erroneous activation of a speech assistant and the
erroneous activation of a speech controlled device as consequence of recognizing
a legal command after the keyword. The false rejection rate (FRR) should also
be low to guarantee good user acceptance. Often, the keyword recognition has to
be realized in an embedded system with limited computational resources. Therefore,
the detection and recognition algorithm has to fulfill the requirements of a low
FAR and FRR on the one hand and the need of a low computational load on the
other hand. We designed a two stage algorithm to meet these expectations. The
first stage consists of a GMM-HMM (Gaussian Mixture Model - Hidden Markov
Model) based recognizer with one or several HMMs for the keyword and a set of
so-called filler HMMs to model speech segments that do not contain the keyword.
To reduce the FAR of the first stage, the MEL spectrum of the pretended keyword
segment is analyzed by employing a neural network. The task of the neural network
as second stage of the recognition process is either to accept or the reject the
keyword as pretended in the first stage. It turns out that the FAR of the first stage
can considerably be reduced by the second stage.},
isbn = {978-3-959081-93-1},
issn = {0940-6832},
keywords = {(Speech) Assistents},
url = {https://www.essv.de/pdf/2020_2_9.pdf},
}