@InProceedings{Höge2018_389,
author = {Harald Höge},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2018},
title = {Using Elementary Articulatory Gestures as Phonetic Units for Speech Recognition},
year = {2018},
editor = {André Berton and Udo Haiber and Wolfgang Minker},
month = mar,
pages = {54--61},
publisher = {TUDpress, Dresden},
abstract = {Perception and production of speech is linked to a set of basic articulatory
gestures related to an articulatory code. Due to the immature methods in measuring
cortical activities, the detailed functionalities of these gestures have not been deciphered
yet. I hypothesize a set of gestures – elementary articulatory gestures (EAGs)
– mimicking human articulatory gestures. The concept of EAGs is based on the hypothesis
that the gestures are generated by a two level control hierarchy. The upper
level is related to ‘broad gestures’, the lower to ‘narrow gestures’. The control of
broad gestures is used for both, for speech perception and speech production, whereas
the concept of narrow gestures is only relevant for speech production. The temporal
control of broad gestures is triggered by the quasi rhythmic opening and closing
of the mandibular [6] steered by entrained ϴ-oscillations [7]. I call one ϴ-cycle a
‘cortical syllable’, which is defined by 3 states: an opening, middle and a closing
state. Each state is related to a set of EAGs, where ɤ-oscillations embedded in the ϴ-oscillations steer their temporal dynamics. In speech perception, the EAGs are perceived
using the temporal constraints given by the upper level of control hierarchy.
A feasibility study is presented, where from a phonetic labeled German speech database
a set of 249 opening, 40 middle and 233 closing EAGs are extracted. Using a
model mimicking human perception an average classification EAG error rate of
48.6% is achieved.},
isbn = {978-3-959081-28-3},
issn = {0940-6832},
keywords = {Signal Processing},
url = {https://www.essv.de/pdf/2018_54_61.pdf},
}