@InProceedings{Höge2022_1136,
author = {Harald Höge},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2022},
title = {Improved features driving an T-oscillator for cortical segmentation of speech into syllables},
year = {2022},
editor = {Oliver Niebuhr and Malin Svensson Lundmark and Heather Weston},
month = mar,
pages = {9--16},
publisher = {TUDpress, Dresden},
abstract = {The paper describes a model for cortical segmentation of the auditory signal
into syllables. Segmentation is based on a θ-oscillator realized by an inter-neuronal
network gamma (PING) structure, where the position and duration of each syllable is
given by a related θ-cycle. The paper is focused on improving features, which drive the
θ-oscillator. We hypothesize that the θ-oscillator is driven by V-edge-neurons. These
neurons have been observed in the superior temporal gyrus (STG) which spike at the
maximal rise of the envelope of the auditory signal at the onset of the nucleus (vowel)
of a syllable. The paper is focused to model the V-edge-neurons. We hypothesize, that
the V-edge-neurons have as input two kinds of CB-features [13] processed in critical
bands (CB). The first kind are edge features derived from the instances of maximal increase
of the partial loudness curve from each CB. The second kind are sustained features
derived from CB-modulation features indicating the presents of vowel-onsets. The
developed θ-oscillator is evaluated using a labeled speech database. The evaluation is
based on the correctness of the match between the position of the syllables and the θ-
cycles given by the sequences of θ-spikes emitted by the θ-oscillator. Compared to [2,3]
considerable progress in correctness has been achieved from 80% to 90%.},
isbn = {978-3-95908-548-9},
issn = {0940-6832},
keywords = {Models},
url = {https://www.essv.de/pdf/2022_9_16.pdf},
}