@inproceedings{086ec7a182964b7681faab785dbaa859,
title = "On the semantic similarity of disease mentions in medline{\textregistered} and twitter",
abstract = "Social media mining is becoming an important technique to track the spread of infectious diseases and to understand specific needs of people affected by a medical condition. A common approach is to select a variety of synonyms for a disease derived from scientific literature to then retrieve social media posts for subsequent analysis. With this paper, we question the underlying assumption that user-generated text always makes use of such names, or assigns them the same meaning as in scientific literature. We analyze the most frequently used concepts in $$\textsc {medline}^{\circledR } $$ for semantic similarity to Twitter use and compare their normalized entropy and cosine similarities based on a simple distributional model. We find that diseases are referred to in semantically different ways in both corpora, a difference that increases in inverse proportion to the frequency of the synonym, and of the commonness of the disease or condition. These results imply that, when sampling social media for disease-related micro-blogs, query expressions must be carefully chosen, and even more so for rarily mentioned diseases or conditions.",
keywords = "Disease names, Medline{\textregistered}, Social media mining, Twitter",
author = "Camilo Thorne and Roman Klinger",
note = "Funding Information: Acknowledgments. This work was supported by a grant from the Ministry of Science, Research and Arts of Baden-W{\"u}rttemberg to Roman Klinger. Publisher Copyright: {\textcopyright} 2018, Springer International Publishing AG, part of Springer Nature. Copyright: Copyright 2018 Elsevier B.V., All rights reserved.; 23rd International Conference on Natural Language and Information Systems, NLDB 2018 ; Conference date: 13-06-2018 Through 15-06-2018",
year = "2018",
doi = "10.1007/978-3-319-91947-8_34",
language = "English",
isbn = "9783319919461",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "324--332",
editor = "Farid Meziane and Max Silberztein and Faten Atigui and Elena Kornyshova and Elisabeth Metais",
booktitle = "Natural Language Processing and Information Systems - 23rd International Conference on Applications of Natural Language to Information Systems, NLDB 2018, Proceedings",
}