@InProceedings{Howard2020_436,
author = {Ian S. Howard},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2020},
title = {Speech Fundamental Period Estimation using a Neural Network},
year = {2020},
editor = {Andreas Wendemuth and Ronald Böck and Ingo Siegert},
month = mar,
pages = {44--51},
publisher = {TUDpress, Dresden},
abstract = {Here we extend previous work for the estimation of the time of excitation
(Tx) from the speech signal using a shallow neural network. We make use of a dataset
that consists of the simultaneously recorded speech and Laryngograph signals from
drama students speaking a phonetically balanced passage. We first use the
Laryngograph signal to estimate the location of vocal fold closures as a function of
time. Then, by considering the problem as a supervised learning task, we train a multilayer
perceptron to map between raw speech samples, selected using a sliding input
window, to a single output target sample that represents the presence or absence of an
excitation point. We present result of operation across several male speakers and also
demonstrate that it is possible to reconstruct the Laryngograph directly from the speech
signal.},
isbn = {978-3-959081-93-1},
issn = {0940-6832},
keywords = {Speech Synthesis},
url = {https://www.essv.de/pdf/2020_44_51.pdf},
}