@InProceedings{Stüker2008_593,
author = {S. Stüker},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2008},
title = {Multilingual Acoustic Features for Porting Speech Recognition Systems to New Languages},
year = {2008},
editor = {Arild Lacroix},
month = mar,
pages = {141--148},
publisher = {TUDpress, Dresden},
abstract = {Linguists estimate the number of currently existing languages to be
between 5,000 and 7,000. The fifteenth edition of the Ethnologue lists 7,299 languages. Only for a small fraction of these languages automatic speech recognition
(ASR) systems have been developed so far. Languages addressed are mainly those
with either a large population of speakers, with sufficient economic funding, or with
high political impact.
In order to be able to cover as many languages as possible, techniques have to be
developed in order to rapidly port speech recognition systems to new languages
in a cost efficient way. The techniques have to be able to be applied to the new
language without the need for extensive linguistic or phonetic knowledge about the
new language and without the need for large amounts of training materials. This is
especially true for the vast number of less prevalent and under resourced languages
in the world.
In the past, phoneme based, language independent acoustic models have been studied for bootstrapping an acoustic model in a new language. These acoustic models
usually have seen multiple languages during training, and work under the assumption that phonemes are pronounced the same across languages. These models can
be applied to a new, unseen language and can be used as a starting base in order
to be adapted to the new language by using comparatively little training material.
In the past it has also been shown that models for acoustic features, describing the
articulator positions for the different phonemes, can also be accurately recognized
across languages and can be trained to become language independent in the same
way as phonemes can. They were combined with phoneme based models and their
behavior on the training languages of the multilingual models was examined.
In this paper we present our first experiments examining the suitability of monolingual and multilingual acoustic features for porting speech recognition systems to
new languages. We combined them with monolingual and multilingual, phoneme
based models in a stream based frame work in order to bootstrap a model in a
new language. The results show that the incorporation of models for articulatory
features into the porting framework significantly improves the performance when
porting ASR systems to new languages, reducing the word error rate by up to 3.7%
relative.},
isbn = {978-3-940046-90-1},
issn = {0940-6832},
keywords = {Spracherkennung},
url = {https://www.essv.de/pdf/2008_141_148.pdf},
}