@InProceedings{Sinha2024_1221,
author = {Yamini Sinha and Jan Hintz and Ingo Siegert},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2024},
title = {Evaluation of Audio Deepfakes - Systematic Review},
year = {2024},
editor = {Timo Baumann},
month = mar,
pages = {181--187},
publisher = {TUDpress, Dresden},
abstract = {Generative models for audio are commonly used for music composition,
sound effects generation for video game development, audio restoration, voice
cloning, etc. The ease of generating indistinguishable fake audio with deep learning
poses a major threat to personal privacy, online security, and political discourse.
Evaluating the quality and realism of these synthetic utterances is crucial for mitigating
the potential for misinformation and harm. To assess this threat, this paper
conducts a systematic review, using Preferred Reporting Items for Systematic Reviews
and Meta-Analyses (PRISMA), on how these deepfake models are currently
evaluated. The analysis of 86 papers shows that the majority of the evaluation
is conducted on a machine level and highlights a research gap regarding the human
perception of deepfakes. This paper explores various methods and perceptual
measures employed in assessing audio deepfakes and evaluating their strengths,
limitations, and future directions.},
isbn = {978-3-95908-325-6},
issn = {0940-6832},
keywords = {Sprachsynthese und Hörpräferenzen},
url = {https://www.essv.de/pdf/2024_181_187.pdf},
doi = {10.35096/othr/pub-7096},
}