@InProceedings{Nehring2023_1196,
author = {Jan Nehring and Nils Feldhus and Akhyar Ahmed},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2023},
title = {Adapters for Resource-Efficient Deployment of NLU Models},
year = {2023},
editor = {Christoph Draxler},
month = mar,
pages = {217--224},
publisher = {TUDpress, Dresden},
abstract = {Modern Transformer-based language models such as BERT are huge
and, therefore, expensive to deploy in practical applications. In environments such
as commercial chatbot-as-a-service platforms that deploy many NLP models in
parallel, less powerful models with a smaller number of parameters are an alternative
to transformers to keep deployment costs down, at the cost of lower accuracy
values. This paper compares different models for Intent Detection concerning their
memory footprint, quality of Intent Detection, and processing speed. Many taskspecific
Adapters can share one large transformer model with the Adapter framework.
The deployment of 100 NLU models requires 1 GB of memory for the
proposed BERT+Adapter architecture, compared to 41.78 GB for a BERT-only architecture.},
isbn = {978-3-95908-303-4},
issn = {0940-6832},
keywords = {Poster},
url = {https://www.essv.de/pdf/2023_217_224.pdf},
}