@inproceedings{ae8c23b644f24117a68180f9deec6146,
title = "Enhancing Extreme Multi-Label Text Classification: Addressing Challenges in Model, Data, and Evaluation",
abstract = "Extreme multi-label text classification is a prevalent task in industry, but it frequently encounters challenges in terms of machine learning perspectives, including model limitations, data scarcity, and time-consuming evaluation. This paper aims to mitigate these issues by introducing novel approaches. Firstly, we propose a label ranking model as an alternative to the conventional SciBERT-based classification model, enabling efficient handling of large-scale labels and accommodating new labels. Secondly, we present an active learning-based pipeline that addresses the data scarcity of new labels during the update of a classification system. Finally, we introduce ChatGPT to assist with model evaluation. Our experiments demonstrate the effectiveness of these techniques in enhancing the extreme multi-label text classification task.",
author = "Dan Li and Zhu, \{Zi Long\} and \{van de Loo\}, Janneke and G{\'o}mez, \{Agn{\'e}s Masip\} and Vikrant Yadav and Georgios Tsatsaronis and Zubair Afzal",
note = "Publisher Copyright: {\textcopyright} 2023 Association for Computational Linguistics.; 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track, EMNLP 2023 ; Conference date: 06-12-2023 Through 10-12-2023",
year = "2023",
doi = "10.18653/v1/2023.emnlp-industry.30",
language = "Ingl{\'e}s",
series = "EMNLP 2023 - 2023 Conference on Empirical Methods in Natural Language Processing, Proceedings of the Industry Track",
publisher = "Association for Computational Linguistics (ACL)",
pages = "313--321",
editor = "Mingxuan Wang and Imed Zitouni",
booktitle = "EMNLP 2023 - 2023 Conference on Empirical Methods in Natural Language Processing, Proceedings of the Industry Track",
address = "Estados Unidos",
}