@InProceedings{Hinterleitner2015_361,
author = {Florian Hinterleitner and Steffen Zander and Klaus-Peter Engelbrecht and Sebastian Möller},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2015},
title = {On the use of automatic speech recognizers for the quality and intelligibility prediction of synthetic speech},
year = {2015},
editor = {Günther Wirsching},
month = mar,
pages = {105--111},
publisher = {TUDpress, Dresden},
abstract = {In this paper we investigate the use of an automatic speech recognizer
(Google Speech API) for the prediction of quality and intelligibility of synthetic
speech. For two databases of rated synthetic speech samples, we analyze the correlation
of the word error rates (WER) obtained from the recognizer for each sample
with ratings on 16 different attribute scales. Moderate correlations are observed
for various quality aspects including overall impression, naturalnesss, and intelligibililty.
Moreover, we analyze in a third database the correlation between intelligibility
by a human, as determined in a test with semantically unpredictable sentences,
and the WER of the recognizer. The correlation between the humans’ and
the recognizer’s WER over all samples is .40, and .94 if averaged by TTS system.},
isbn = {978-3-959080-00-2},
issn = {0940-6832},
keywords = {Sprachsynthese},
url = {https://www.essv.de/pdf/pdf/2015_105_111.pdf},
}