@InProceedings{Hillmann2021_1124,
<br/>   author = {Stefan Hillmann and Tilo Himmelsbach and Benjamin Weiss},
<br/>   booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2021},
<br/>   title = {Comparison of Training Behaviour and Performance of Reinforcement Learning based Policies for Dialogue Management},
<br/>   year = {2021},
<br/>   editor = {Stefan Hillmann and Benjamin Weiss and Thilo Michael and Sebastian Möller},
<br/>   month = mar,
<br/>   pages = {239--246},
<br/>   publisher = {TUDpress, Dresden},
<br/>   abstract  = {We present the results of a laborious comparison of four different reinforcement learning algorithms that are used to train policies for dialogue management. We have trained 32 policies by varying the concept error rate, the number of user dialogue acts, and the number of training dialogues. Data about the training behavior and performance of the trained policy in the evaluation are presented. Actor-critic leads to very good task success rates and notable shorter dialogues among the evaluated algorithms (actor-critic, REINFORCE, Q-Learning, and WoLF-PHC).},
<br/>   isbn = {978-3-959082-27-3},
<br/>   issn = {0940-6832},
<br/>   keywords = {Sprachdialog},
<br/>   url = {https://www.essv.de/pdf/2021_239_246.pdf},
<br/>}