@InProceedings{Sering2024_1220,
author = {Konstantin Sering},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2024},
title = {Speech/Non-Speech Classification Slightly Improves Synthesis Quality in PAULE},
year = {2024},
editor = {Timo Baumann},
month = mar,
pages = {173--180},
publisher = {TUDpress, Dresden},
abstract = {One of the tasks PAULE[1, 2] solves is finding suitable control parameter (cp-)trajectories for a given target acoustic. These cp-trajectories can be used to synthesize speech with the articulatory speech synthesizer of the VocalTractLab (VTL) [3]. If the target acoustic contains substantial microphone noise or other background noises, occasionally PAULE optimizes not for the speech in the target, but for this background noises. By adding a speech/non-speech classifier to the feedback and planning-loop in PAULE this resynthesis of background noises should be mitigated. Unfortunately, the improvements were minor, which might be due to uninformative gradients of the classifier. The importance of informative gradients and the use classifiers to adapt PAULE to different tasks are explained and discussed.},
isbn = {978-3-95908-325-6},
issn = {0940-6832},
keywords = {Sprachsynthese und Hörpräferenzen},
url = {https://www.essv.de/pdf/2024_173_180.pdf},
doi = {10.35096/othr/pub-7095},
}