@inproceedings{26fe72f2135c44689042f8b4f941a471,
title = "SHROOM-INDElab at SemEval-2024 Task 6: Zero- and Few-Shot LLM-Based Classification for Hallucination Detection",
abstract = "We describe the University of Amsterdam Intelligent Data Engineering Lab team's entry for the SemEval-2024 Task 6 competition. The SHROOM-INDElab system builds on previous work on using prompt programming and in-context learning with large language models (LLMs) to build classifiers for hallucination detection, and extends that work through the incorporation of context-specific definition of task, role, and target concept, and automated generation of examples for use in a few-shot prompting approach. The resulting system achieved fourth-best and sixth-best performance in the model-agnostic track and model-aware tracks for Task 6, respectively, and evaluation using the validation sets showed that the system's classification decisions were consistent with those of the crowd-sourced human labellers. We further found that a zero-shot approach provided better accuracy than a few-shot approach using automatically generated examples. Code for the system described in this paper is available on Github.",
author = "Allen, {Bradley P.} and Fina Polat and Paul Groth",
note = "Publisher Copyright: {\textcopyright} 2024 Association for Computational Linguistics.; 18th International Workshop on Semantic Evaluation, SemEval 2024, co-located with the 2024 Annual Conference of the North American Chapter of the Association for Computational Linguistics, NAACL 2024 ; Conference date: 20-06-2024 Through 21-06-2024",
year = "2024",
language = "Ingl{\'e}s",
series = "SemEval 2024 - 18th International Workshop on Semantic Evaluation, Proceedings of the Workshop",
publisher = "Association for Computational Linguistics (ACL)",
pages = "839--844",
editor = "Ojha, {Atul Kr.} and Dohruoz, {A. Seza} and Madabushi, {Harish Tayyar} and {Da San Martino}, Giovanni and Sara Rosenthal and Aiala Rosa",
booktitle = "SemEval 2024 - 18th International Workshop on Semantic Evaluation, Proceedings of the Workshop",
address = "Estados Unidos",
}