@InProceedings{Müller2018_386,
author = {Markus Müller and Sebastian Stüker and Alex Waibel},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2018},
title = {Enhancing Multilingual Graphemic RNN based ASR Systems Using Phone Information},
year = {2018},
editor = {André Berton and Udo Haiber and Wolfgang Minker},
month = mar,
pages = {30--37},
publisher = {TUDpress, Dresden},
abstract = {In the past, we proposed the use of Language Feature Vectors (LFVs)
to better adapt multilingual speech recognition systems to languages. Recently, we
applied this method to RNN/CTC based systems. The recognition accuracy could
be improved by modulating the network using LFVs. In this work, we propose an
improvement to this approach by refining the network architecture as well as the
training strategy. We first evaluated multiple methods for applying the modulation.
As we are using bi-directional layers, each unit outputs two values, one per direc-
tion. Optimizing the combination of the outputs for each direction did improve the
performance. In addition, we propose a method for including phonetic informa-
tion into the training process of a graphemic system. By pre-training layers using
phones as targets, the network did learn features to discriminate phones. Adding
more layers and a two stage fine-tuning process using graphemes, we first forced
the network map phonetic features to graphemes. In the second stage, we allowed
the network to update the phonetic feature detectors as well. Both methods im-
proved the performance of our setup. We evaluated our setup using a combination
of 4 languages (English, French, German, Turkish), with a joint set of acoustic
units.},
isbn = {978-3-959081-28-3},
issn = {0940-6832},
keywords = {Signal Processing},
url = {https://www.essv.de/pdf/2018_30_37.pdf},
}