@InProceedings{Howard2023_1184,
author = {Ian S. Howard and Julian Mcglashan and Adrian J. Fourcin},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2023},
title = {Training a CNN to Estimate Voice Pathology from Connected Speech Using EGG to Automatically Label the Dataset for Voicing},
year = {2023},
editor = {Christoph Draxler},
month = mar,
pages = {142--149},
publisher = {TUDpress, Dresden},
abstract = {We describe a new system for estimating voice pathology directly from
the acoustic speech signal to assist in the diagnosis of pathological voice conditions
by voice specialists. Our main novel contributions are the use of Electroglottography
(EGG) in neural net training to automatically label speech acoustic signals for voicing
and the generation of running estimates of pathology with high temporal resolution
from the acoustic signal alone. These estimates can also be linked to the parts of speech
signals where voice pathology manifests itself most strongly. By operating directly on
the acoustic signal waveform without the use of any pre-processing, we avoid the use
of hand-crafted features. We trained and tested a neural network using speech datasets
with normal and pathological voicing and found that it can provide effective finegrained
indications of pathology. Our quantitative results show that this neural network
performs well in distinguishing between speakers with normal and pathological
voice conditions, achieving a recognition rate of 91%, which compares favorably with
results from other studies.},
isbn = {978-3-95908-303-4},
issn = {0940-6832},
keywords = {Automatic Speech Recognition},
url = {https://www.essv.de/pdf/2023_142_149.pdf},
}