% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Schumann:623229,
author = {Schumann, Yannis and Gocke, Antonia and Neumann, Julia E.},
title = {{C}omputational {M}ethods for {D}ata {I}ntegration and
{I}mputation of {M}issing {V}alues in {O}mics {D}atasets},
journal = {Proteomics},
volume = {25},
number = {1-2},
issn = {1615-9853},
address = {Weinheim},
publisher = {Wiley VCH},
reportid = {PUBDB-2025-00661},
pages = {e202400100},
year = {2025},
note = {J.E.N is funded by the DFG (Emmy Noether program).},
abstract = {Molecular profiling of different omic-modalities (e.g., DNA
methylomics, transcriptomics, proteomics) in biological
systems represents the basis for research and clinical
decision-making. Measurement-specific biases, so-called
batch effects, often hinder the integration of independently
acquired datasets, and missing values further hamper the
applicability of typical data processing algorithms. In
addition to careful experimental design, well-defined
standards in data acquisition and data exchange, the
alleviation of these phenomena particularly requires a
dedicated data integration and preprocessing pipeline. This
review aims to give a comprehensive overview of
computational methods for data integration and missing value
imputation for omic data analyses.We provide formal
definitions for missing value mechanisms and propose a novel
statistical taxonomy for batch effects, especially in the
presence of missing data. Based on an automated document
search and systematic literature review, we describe 32
distinct data integration methods from five main
methodological categories, as well as 37 algorithms for
missing value imputation from five separate categories.
Additionally, this review highlights multiple quantitative
evaluation methods to aid researchers in selecting a
suitable set of methods for their work. Finally, this work
provides an integrated discussion of the relevance of batch
effects and missing values in omics with corresponding
method recommendations. We then propose a comprehensive
three-step workflow from the study conception to final data
analysis and deduce perspectives for future research.
Eventually, we present a comprehensive flow chart as well as
exemplary decision trees to aid practitioners in the
selection of specific approaches for imputation and data
integration in their studies.},
cin = {IT},
ddc = {540},
cid = {I:(DE-H253)IT-20120731},
pnm = {623 - Data Management and Analysis (POF4-623)},
pid = {G:(DE-HGF)POF4-623},
experiment = {EXP:(DE-MLZ)NOSPEC-20140101},
typ = {PUB:(DE-HGF)16},
pubmed = {pmid:39740174},
doi = {10.1002/pmic.202400100},
url = {https://bib-pubdb1.desy.de/record/623229},
}