@InProceedings{Betz2015_364,
author = {Simon Betz and Petra Wagner and David Schlangen},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2015},
title = {Modular Synthesis of Dis uencies for Conversational Speech Systems},
year = {2015},
editor = {G√ľnther Wirsching},
month = mar,
pages = {128--134},
publisher = {TUDpress, Dresden},
abstract = {It has been shown that dialogue systems benefit from incremental architectures to produce fast responses and to interact with the interlocutor in a more human-like way. The advantage of quick responses yields the disadvantage of running out of things to say for a while. In such occasions, humans tend to produce disfluencies as a listener-oriented strategy to signal the ongoing production process and to buy time for finalizing the turn. Introducing disfluency capabilities into a speech synthesis module of a dialogue system may therefore be a straightforward strategy towards conversational speech systems. Disfluencies are a very complex matter, they can take various chaining and nested forms in human communication. We do not attempt to equip our system with the full range of possible disfluent time-buying strategies found in human interaction. For a first perceptual evaluation of the most suitable synthetic disfluency strategy to be integrated into the dialogue system, we focus on three structural factors that are able to cover a wide range of attested disfluency patterns: lengthening, word cutoffs and pauses. This leads to several different configurations a disfluent sentence can take. Sentences from a spontaneous speech corpus were resynthesized in all possible configurations using Mary TTS. In order to identify euphone configurations, these stimuli were then presented to test subjects in a perception test.},
isbn = {978-3-959080-00-2},
issn = {0940-6832},
keywords = {Sprachsynthese},
url = {https://www.essv.de/pdf/2015_128_134.pdf},
}