@InProceedings{Betz2015_364,
author = {Simon Betz and Petra Wagner and David Schlangen},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2015},
title = {Modular Synthesis of Dis uencies for Conversational Speech Systems},
year = {2015},
editor = {Günther Wirsching},
month = mar,
pages = {128--134},
publisher = {TUDpress, Dresden},
abstract = {It has been shown that dialogue systems benefit from incremental
architectures to produce fast responses and to interact with the interlocutor in a
more human-like way. The advantage of quick responses yields the disadvantage of
running out of things to say for a while. In such occasions, humans tend to produce
disfluencies as a listener-oriented strategy to signal the ongoing production process
and to buy time for finalizing the turn. Introducing disfluency capabilities into a
speech synthesis module of a dialogue system may therefore be a straightforward
strategy towards conversational speech systems.
Disfluencies are a very complex matter, they can take various chaining and nested
forms in human communication. We do not attempt to equip our system with the
full range of possible disfluent time-buying strategies found in human interaction.
For a first perceptual evaluation of the most suitable synthetic disfluency strategy to
be integrated into the dialogue system, we focus on three structural factors that are
able to cover a wide range of attested disfluency patterns: lengthening, word cutoffs
and pauses. This leads to several different configurations a disfluent sentence can
take. Sentences from a spontaneous speech corpus were resynthesized in all possible
configurations using Mary TTS. In order to identify euphone configurations,
these stimuli were then presented to test subjects in a perception test.},
isbn = {978-3-959080-00-2},
issn = {0940-6832},
keywords = {Sprachsynthese},
url = {https://www.essv.de/pdf/2015_128_134.pdf},
}