@InProceedings{Wagner2017_234,
author = {Petra Wagner and Simon Betz},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2017},
title = {Speech Synthesis Evaluation: Realizing a Social Turn},
year = {2017},
editor = {Jürgen Trouvain and Ingmar Steiner and Bernd Möbius},
month = mar,
pages = {167--173},
publisher = {TUDpress, Dresden},
abstract = {Based on a meta-analysis of the state-of-the-art in speech synthesis evaluations,
we diagnose the following dilemma: Despite known drawbacks, evaluations
predominantly rely on small-scale laboratory tests, typically capturing MOSbased
global impressions based on isolated sentences, with the (resynthesized) human
voice serving as a gold standard. The problem with such approaches is that
synthesis quality can only reliably be estimated if presented in a contextualized
manner, e.g. as part of an application and together with its embodiment as an artificial
agent, robot or an disembodied voice. As most evaluations are carried out
in parallel to system development, and as these tend to be concerned with small
details in the developmental process, evaluations of fully fledged applications are
often neither possible nor useful. We argue, that with a few modifications in standard
evaluation protocols, i.e. by introducing simple interactive scenarios and by
relying on both subjective impressionistic and behavioral or physiological measurements,
the reliability of such evaluations could be significantly improved.},
isbn = {978-3-959080-92-7},
issn = {0940-6832},
keywords = {Poster},
url = {https://www.essv.de/pdf/2017_167_173.pdf},
}