@InProceedings{Liu2025_1247,
author = {Huiyu Liu and Gokul Srinivasagan and Munir Georges},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2025},
title = {Real-time audio transcriber for language barrier-free classrooms},
year = {2025},
editor = {Sven Grawunder},
month = mar,
pages = {146--154},
publisher = {TUDpress, Dresden},
abstract = {Language barriers in educational environments pose significant challenges to international students and educators, particularly in real-time lecture transcription. While large-scale speech models like whisper demonstrate impressive
capabilities, their deployment in resource-constrained settings remains challenging. This study develops a lightweight solution for real-time speech transcription
and German-English translation through knowledge distillation and model compression techniques. By leveraging the whisper model to generate pseudo-labels
and exploring various distillation strategies, we created compact models that maintain high performance while reducing computational demands. Our experiments
show that a compressed model with approximately 40 million parameters achieves
competitive word error rate (WER) and BLEU scores in both transcription and
translation tasks. The resulting system, implemented using whisper.cpp, achieves
real-time performance with a real-time factor (RTF) below 0.5 in CPU-only environments, effectively mitigating language barriers in classroom settings.},
isbn = {978-3-95908-803-9},
issn = {0940-6832},
keywords = {Multilingual Speech and Language Data Processing},
url = {https://www.essv.de/pdf/2025_146_154.pdf},
}