@InProceedings{Boddu2025_1240,
author = {Raviteja Boddu and Anderson De Lima Luiz and Munir Georges},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2025},
title = {Significance scoring for summarizing lecture recordings: a multi-modal perspective},
year = {2025},
editor = {Sven Grawunder},
month = mar,
pages = {93--100},
publisher = {TUDpress, Dresden},
abstract = {Summarizing lecture recordings is essential for enhancing knowledge
dissemination and improving accessibility in education. Traditional text-based approaches often neglect auditory and contextual cues vital for understanding spoken
interactions. This study builds on advancements in educational AI platforms for
lecture recording analysis [1] [2] [3] and adapts a previously validated multi-modal
framework [4], originally designed for dialogue summarization, to the educational
domain. Using a tailored dataset enriched with annotations for relevance, transcription accuracy, and visual aids, we evaluate the contributions of text and audio modalities in generating tailored summaries. Previous results on benchmark
datasets, such as AMI [5] and ICSI [6], demonstrated the framework’s effectiveness, with audio achieving higher mean significance scores 94.00% and 96.06%
compared to text 89.22% and 90.55%. New evaluations on our tailored datasets
for english and german lectures reveal audio mean significance scores of 98.62%
and 94.80%, respectively, outperforming text scores of 95.26% and 92.66%. These
results showcase the framework’s adaptability and its potential to transform educational tools by addressing diverse learning needs.},
isbn = {978-3-95908-803-9},
issn = {0940-6832},
keywords = {Benchmarking ASR and TTS},
url = {https://www.essv.de/pdf/2025_93_100.pdf},
}