@InProceedings{Milde2021_1109,
author = {Benjamin Milde and Robert Geislinger and Irina Lindt and Timo Baumann},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2021},
title = {Open source automatic lecture subtitling},
year = {2021},
editor = {Stefan Hillmann and Benjamin Weiss and Thilo Michael and Sebastian Möller},
month = mar,
pages = {128--135},
publisher = {TUDpress, Dresden},
abstract = {We present a fully automatic solution for German video subtitling, with a focus on lecture videos. We rely entirely on open source models and scripts for German ASR, automatic punctuation reconstruction and subtitle segmentation. All training scripts, 1000h of German speech training data, pre-trained models and the final subtitling program are publicly available. It can readily be integrated into lecture video platforms such as Lecture2Go. The automatically generated subtitles can also serve as a basis to make the video material more accessible (e.g. via search, keyword clouds, and the like) or for further manual revision, potentially helping in significantly speeding up manual work. A particular challenge that we observe in lectures are technical terms that are frequent in a particular lecture, but infrequent in a typical language model and that might be out of vocabulary for a general purpose ASR. We approach this challenge by extracting texts from accompanying lecture slides to adapt the language model of our TDNN-HMM based ASR system. We demonstrate the usability of the full system and its generated subtitles and evaluate on a dataset of manually transcribed lectures with an average of 26.3% WER.},
isbn = {978-3-959082-27-3},
issn = {0940-6832},
keywords = {Automatische Spracherkennung},
url = {https://www.essv.de/pdf/2021_128_135.pdf},
}