@conference{
author = "Zečević, Anđelka and Kovačević, Jovana and Davidović, Radoslav",
year = "2023",
abstract = "Information aggregation from various gen, disease, and gen-disease databases such
as DisGeNet, COSMIC, HumsaVar, Orphanet, ClinVar, HPO, and Diseases into a unique
database would enable researchers to analyze and compare valuable domain findings
in a more convenient and systematic way. However, the aggregation poses numerous
challenges due to non-uniform information annotation across the databases. In this work,
we address the problem of mapping a disease name, when needed, into a standardized
disease code (DOID) based on Natural Language Processing text representation
techniques. We examine the benefits and limitations of using off-the-shelf embeddings
such as Med2vec, and language models such as BioBERT, UmlsBERT, and PubMedBERT
in retrieval scenarios with respect to standard full-text search. In addition to qualitative
improvements, we elaborate on the technical requirements and computational
complexities that come with the embracement of language models and semantic search.",
publisher = "Belgrade : Institute of molecular genetics and genetic engineering",
journal = "4th Belgrade Bioinformatics Conference",
title = "Mapping of Disease Names to Disease Codes based on Natural Language Processing Techniques",
pages = "37-37",
volume = "4",
url = "https://hdl.handle.net/21.15107/rcub_imagine_1975"
}