@InProceedings{Ranzenberger2018_420,
author = {Thomas Ranzenberger and Christian Hacker and Florian Gallwitz},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2018},
title = {Integration of a Kaldi Speech Recognizer into a Speech Dialog System for Automotive Infotainment Applications},
year = {2018},
editor = {André Berton and Udo Haiber and Wolfgang Minker},
month = mar,
pages = {293--300},
publisher = {TUDpress, Dresden},
abstract = {In this paper we present an evaluation of the Kaldi speech recognizer in an automotive context. We integrate Kaldi into an existing software tool which is used to specify human-machine interfaces including speech dialogs for automo- tive and non-automotive domains. This enables linguists and other researchers to use their own Kaldi models for user studies or experiments on voice enabled in- terfaces. We train our own Kaldi models using a freely available corpus based on audiobooks. Further, we propose an algorithm to map utterances returned by the Kaldi recognizer onto intents of the speech dialog system. We evaluate the pro- vided algorithm with our trained Kaldi model in the extended software tool. The used Kaldi model is based on time delayed neuronal networks and has a word error rate of 5.9% and a sentence error rate of 52.5% on the test data of the corpus. 22 participants spoke 50 random sentences of a self created corpus of example sen- tences. The words of the collected corpus are a subset of the words which are used for the language model of the Kaldi recognizer. The applied method is able to re- duce the sentence error rate from 34% to 3% on this corpus. The Kaldi speech recognizer is suitable for automotive command and control scenarios. The recog- nized intents are detected robustly with the used algorithm and proposed modeling techniques.},
isbn = {978-3-959081-28-3},
issn = {0940-6832},
keywords = {Demo Session},
url = {https://www.essv.de/pdf/2018_293_300.pdf},
}