@inproceedings{0d7b3ac3e0854a5690237ba232524d5a,
title = "Corpus annotation as a scientific task",
abstract = "Annotation studies in CL are generally unscientific: they are mostly not reproducible, make use of too few (and often non-independent) annotators and use guidelines that are often something of a moving target. Additionally, the notion of 'expert annotators' invariably means only that the annotators have linguistic training. While this can be acceptable in some special contexts, it is often far from ideal. This is particularly the case when subtle judgements are required or when, as increasingly, one is making use of corpora originating from technical texts that have been produced by, and intended to be consumed by, an audience of technical experts in the field. We outline a more rigorous approach to collecting human annotations, using as our example a study designed to capture judgements on the meaning of hedge words in medical records.",
keywords = "Annotation, Electronic patient records, Hedges",
author = "Donia Scott and Rossano Barone and Rob Koeling",
year = "2012",
language = "Ingl{\'e}s",
series = "Proceedings of the 8th International Conference on Language Resources and Evaluation, LREC 2012",
publisher = "European Language Resources Association (ELRA)",
pages = "1481--1485",
editor = "Dogan, \{Mehmet Ugur\} and Joseph Mariani and Asuncion Moreno and Sara Goggi and Khalid Choukri and Nicoletta Calzolari and Jan Odijk and Thierry Declerck and Bente Maegaard and Stelios Piperidis and Helene Mazo and Olivier Hamon",
booktitle = "Proceedings of the 8th International Conference on Language Resources and Evaluation, LREC 2012",
note = "8th International Conference on Language Resources and Evaluation, LREC 2012 ; Conference date: 21-05-2012 Through 27-05-2012",
}