@InProceedings{Marquenie2025_1258,
<br/>   author = {Jan Marquenie and Mareile Leonhardt and Sven Grawunder and Ingo Siegert},
<br/>   booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2025},
<br/>   title = {Gender spectrum data from podcasts – a proof of concept},
<br/>   year = {2025},
<br/>   editor = {Sven Grawunder},
<br/>   month = mar,
<br/>   pages = {239--246},
<br/>   publisher = {TUDpress, Dresden},
<br/>   abstract  = {Bias in speech recognition systems persists, particularly regarding gender
identities and sexual orientations. Although recent efforts have diversified datasets
by addressing age, language, ethnicity, and recording conditions, LGBTQIA+ speakers remain underrepresented. To help fill this gap, we investigated the feasibility
of using publicly accessible podcasts featuring LGBTQIA+ persons to compile a
corpus of 126 speakers. We propose a semi-automatic gathering process starting
with automatic diarization of each episode and with successive identification of
the hosts, linking metadata and guest information from the episode information,
followed by a manual revision or addition of the labels. Our findings highlight
podcast data as a promising avenue for capturing the diversity needed to mitigate
bias in speech technology and foster more equitable voice systems.},
<br/>   isbn = {978-3-95908-803-9},
<br/>   issn = {0940-6832},
<br/>   keywords = {Poster},
<br/>   url = {https://www.essv.de/pdf/2025_239_246.pdf},
<br/>}