@InProceedings{Steiner2020_451,
author = {Peter Steiner and Simon Stone and Peter Birkholz},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2020},
title = {Note Onset Detection using Echo State Networks},
year = {2020},
editor = {Andreas Wendemuth and Ronald Böck and Ingo Siegert},
month = mar,
pages = {157--164},
publisher = {TUDpress, Dresden},
abstract = {In music analysis, one of the most fundamental tasks is note onset detection
– detecting the beginning of new note events. It is the basis for more high-level
tasks, such as beat tracking or tempo detection. The main outline of all approaches
for onset detection is roughly the same: The audio signal is transformed into an
Onset Detection Function (ODF), which is zero for most of the time but has pronounced
peaks in case of onsets. Applying peak picking algorithms on the ODF, the
onset times can be extracted. Currently, Convolutional Neural Networks (CNNs)
define the state of the art. In this paper, a first exploration of Echo State Networks
(ESNs) to obtain an ODF is presented. ESNs have achieved comparable results to
CNNs in several recognition tasks, such as speech and image recognition. Features
were extracted using a bank of filters with a logarithmic frequency spacing. The
feature vectors were fed into the ESN that computed the ODF. Applying a simple
threshold-based peak picking algorithm on the ODF, the onsets were detected.
For the hyperparameter optimization, a dataset with pre-defined splits for an 8-fold
cross validation was used. With all hyperparameters optimized, we reached an
F-Measure of 0.812 using a bidirectional ESN with 8000 neurons.},
isbn = {978-3-959081-93-1},
issn = {0940-6832},
keywords = {Poster},
url = {https://www.essv.de/pdf/2020_157_164.pdf},
}