@InProceedings{Hirose2010_544,
author = {Keikichi Hirose and Keiko Ochi and Miaomiao Wang and Tatsuya Matsuda and Miaomiao Wen and Nobuaki Minematsu},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2010},
title = {Using FO Contour Generation Process Model for Improved and Flexible Control of Prosodic Features in HMM-based Speech Synthesis},
year = {2010},
editor = {Hansjörg Mixdorff},
month = mar,
pages = {84--93},
publisher = {TUDpress, Dresden},
abstract = {Generation process model of fundamental frequency contours known as Fujisaki's model is ideal to represent global features of prosody. It is a command response model, where the commands have clear relations with linguistic and para/non linguistic information included in the utterance. Therefore, by controlling fundamental frequency contours in the framework of the generation process model, a more flexible control of prosodic features comes possible in speech synthesis. Also, the model can be used to solve the problems of HMM-based speech synthesis, which arise from frame-by-frame treatment of fundamental frequencies. In this paper, two methods for improved control of prosodic features in HMM-based speech synthesis, and one method for flexible fundamental frequency control to realize prosodic focuses in synthetic speech, are presented. All these methods are based on the generation process model.},
isbn = {978-3-941298-85-9},
issn = {0940-6832},
keywords = {Prosody and Affect},
url = {https://www.essv.de/pdf/2010_84_93.pdf},
}