@InProceedings{Ranzenberger2025_1241,
author = {Thomas Ranzenberger and Ilja Baumann and Sebastian P. Bayerl and Dominik Wagner and Tobias Bocklet and Korbinian Riedhammer},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2025},
title = {Evaluation of recognition errors of hybrid and transformer-based ASR systems in German video lectures},
year = {2025},
editor = {Sven Grawunder},
month = mar,
pages = {101--108},
publisher = {TUDpress, Dresden},
abstract = {We analyze different errors in speech recognition systems, focusing on consecutive insertions and deletions, known as hallucinations and elisions in transformer-based end-to-end automatic speech recognition (ASR) systems. We compare errors from a TDNN-HMM, and whisper-based models on English and German spontaneous speech. Based on a human annotated subset of German lecture videos, we investigate whether these blocks of deletions affect the semantics of the utterance. Whisper performs best and preserves the meaning in 90% of the annotated error segments even containing consecutive deletions on this subset. We analyze the word error rate and do further analysis of errors using natural language processing to detect lemmatization errors, compound word errors, and out-of-vocabulary words. We discuss possible reasons and mitigations.},
isbn = {978-3-95908-803-9},
issn = {0940-6832},
keywords = {Benchmarking ASR and TTS},
url = {https://www.essv.de/pdf/2025_101_108.pdf},
}