@InProceedings{Jokisch2021_1103,
author = {Oliver Jokisch and Tilo Strutz and Alexander Leipnitz and Ingo Siegert and and Andrey Ronzhin},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2021},
title = {Audio and Video Processing of UAV-Based Signals in the Harmonic Project},
year = {2021},
editor = {Stefan Hillmann and Benjamin Weiss and Thilo Michael and Sebastian Möller},
month = mar,
pages = {77--86},
publisher = {TUDpress, Dresden},
abstract = {The article summarizes selected results of audio and video signal processing in a joint research project on agricultural mission data (HARMONIC) from our previous publications. We compare the results of audio-processing tasks, based on single-channel recordings directly at a small unmanned aerial vehicle (UAV, drone) with the improvements using a lightweight microphone array at the drone, and post-filtering methods. To demonstrate the practical relevance, we survey the speech-recognition performance by simulating human speech commands to a hovering UAV, with limited success due to remaining interference of speech and ego-noise frequencies, also in the improved setup. The video-processing tasks involve the classification of agricultural textures (e.g. different fields, wood areas, or paths, in which we achieve an accuracy of 88.7%) and the detection of typical objects, such as a land machine, animal or person by convolutional neural networks (CNN). Utilizing an image-scaling approach for far-distance objects, the mean average precision in the detection of “small persons” in a large image is improving up to 5...8%, compared to the CNN baseline approach in the reference datasets AgriDrone and VisDrone. Finally, we discuss the potentialities for a combined use of audio and video data, to enable improved methods for human-drone interaction.},
isbn = {978-3-959082-27-3},
issn = {0940-6832},
keywords = {Postersession 1},
url = {https://www.essv.de/pdf/pdf/2021_77_86.pdf},
}