@InProceedings{Sasse2021_1104,
author = {Mino Lee Sasse and Stefan Schaffer and Aaron Ruß},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2021},
title = {Automatic-Subtitling: Comparison on the Performance of Forced Alignment and Automatic Speech Recognition},
year = {2021},
editor = {Stefan Hillmann and Benjamin Weiss and Thilo Michael and Sebastian Möller},
month = mar,
pages = {87--94},
publisher = {TUDpress, Dresden},
abstract = {This work is focusing on the automatic generation of subtitles using different tools that can be categorized as Forced Aligners (FAs) or Automatic Speech Recognizers (ASRs). A comparison of the performance of FA and ASR for the task of generating same-language subtitles was conducted. The prime motivation was a previous task, which was the extraction of sentence-utterances in different audio files using word-timestamps. Three different tools were used for this work: aeneas [1] which is an FA, Cerence [2], which is an ASR and Sonix [3], which is also an ASR. We conducted a technical evaluation and a subjective evaluation based on a case study. In this study people were presented with different stimuli, each stimulus using generated subtitles based on the time-information given by the different tools mentioned above. The resulting data of a case study confirmed a rise in performance of Cerence compared to aeneas.},
isbn = {978-3-959082-27-3},
issn = {0940-6832},
keywords = {Postersession 1},
url = {https://www.essv.de/pdf/2021_87_94.pdf},
}