@InProceedings{Sering2024_1220,
author = {Konstantin Sering},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2024},
title = {Speech/Non-Speech Classification Slightly Improves Synthesis Quality in PAULE},
year = {2024},
editor = {Timo Baumann},
month = mar,
pages = {173--180},
publisher = {TUDpress, Dresden},
abstract = {One of the tasks PAULE[1, 2] solves is finding suitable control parameter
(cp-)trajectories for a given target acoustic. These cp-trajectories can be used
to synthesize speech with the articulatory speech synthesizer of the VocalTractLab
(VTL) [3]. If the target acoustic contains substantial microphone noise or other
background noises, occasionally PAULE optimizes not for the speech in the target,
but for this background noises. By adding a speech/non-speech classifier to
the feedback and planning-loop in PAULE this resynthesis of background noises
should be mitigated. Unfortunately, the improvements were minor, which might
be due to uninformative gradients of the classifier. The importance of informative
gradients and the use classifiers to adapt PAULE to different tasks are explained
and discussed.},
isbn = {978-3-95908-325-6},
issn = {0940-6832},
keywords = {Sprachsynthese und Hörpräferenzen},
url = {https://www.essv.de/pdf/2024_173_180.pdf},
doi = {10.35096/othr/pub-7095},
}