@InProceedings{Sering2025_1269,
author = {Konstantin Sering and Yu-Hsiang Tseng and Adriana Hanulikova},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2025},
title = {Phonetic distances in L3-speech},
year = {2025},
editor = {Sven Grawunder},
month = mar,
pages = {323--330},
publisher = {TUDpress, Dresden},
abstract = {Speaking newly learned words in an additional language (L3) can be
especially challenging when these words contain consonant configurations (consonant clusters) that are absent in the speaker’s first language/s (L1) or any previously
learned language (L2). To evaluate these difficulties different methods of acoustic
distance measures on the word level are explored and contrasted against each other.
Two types of features are used Mel frequency cepstral coefficients (MFCC)-based
features and artificial neuronal network (ANN)-based features using Wav2Vec2.
The two feature sets lead to different conclusions in the research question of interest. This is problematic as both measures are used in the literature as acoustic
distance measures and therefore opens the possibility for p-hacking. The statistical
findings presented here are robust against different types or normalisation and different types of calculating the distance distance, namely an Eucleadiean distance
between average vectors and the minimal dynamic-time-warping (DTW) distance.
The MFCC-based features suggests that consonant clusters lead to smaller acoustic
distances and therefore are easier to produce, while the Wav2Vec2-based features
suggest that novel words with consonant clusters have a larger acoustic distance.},
isbn = {978-3-95908-803-9},
issn = {0940-6832},
keywords = {Show and Tell},
url = {https://www.essv.de/pdf/2025_323_330.pdf},
}