@InProceedings{Höge2022_1136,
author = {Harald Höge},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2022},
title = {Improved features driving an T-oscillator for cortical segmentation of speech into syllables},
year = {2022},
editor = {Oliver Niebuhr and Malin Svensson Lundmark and Heather Weston},
month = mar,
pages = {9--16},
publisher = {TUDpress, Dresden},
abstract = {The paper describes a model for cortical segmentation of the auditory signal into syllables. Segmentation is based on a θ-oscillator realized by an inter-neuronal network gamma (PING) structure, where the position and duration of each syllable is given by a related θ-cycle. The paper is focused on improving features, which drive the θ-oscillator. We hypothesize that the θ-oscillator is driven by ‘V-edge-neurons’. These neurons have been observed in the superior temporal gyrus (STG) which spike at the maximal rise of the envelope of the auditory signal at the onset of the nucleus (vowel) of a syllable. The paper is focused to model the V-edge-neurons. We hypothesize, that the V-edge-neurons have as input two kinds of CB-features [13] processed in critical bands (CB). The first kind are edge features derived from the instances of maximal increase of the partial loudness curve from each CB. The second kind are sustained features derived from CB-modulation features indicating the presents of vowel-onsets. The developed θ-oscillator is evaluated using a labeled speech database. The evaluation is based on the correctness of the match between the position of the syllables and the θ- cycles given by the sequences of θ-spikes emitted by the θ-oscillator. Compared to [2,3] considerable progress in correctness has been achieved from 80% to 90%.},
isbn = {978-3-95908-548-9},
issn = {0940-6832},
keywords = {Models},
url = {https://www.essv.de/pdf/2022_9_16.pdf},
}