% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Schumann:623229,
      author       = {Schumann, Yannis and Gocke, Antonia and Neumann, Julia E.},
      title        = {{C}omputational {M}ethods for {D}ata {I}ntegration and
                      {I}mputation of {M}issing {V}alues in {O}mics {D}atasets},
      journal      = {Proteomics},
      volume       = {25},
      number       = {1-2},
      issn         = {1615-9853},
      address      = {Weinheim},
      publisher    = {Wiley VCH},
      reportid     = {PUBDB-2025-00661},
      pages        = {e202400100},
      year         = {2025},
      note         = {J.E.N is funded by the DFG (Emmy Noether program).},
      abstract     = {Molecular profiling of different omic-modalities (e.g., DNA
                      methylomics, transcriptomics, proteomics) in biological
                      systems represents the basis for research and clinical
                      decision-making. Measurement-specific biases, so-called
                      batch effects, often hinder the integration of independently
                      acquired datasets, and missing values further hamper the
                      applicability of typical data processing algorithms. In
                      addition to careful experimental design, well-defined
                      standards in data acquisition and data exchange, the
                      alleviation of these phenomena particularly requires a
                      dedicated data integration and preprocessing pipeline. This
                      review aims to give a comprehensive overview of
                      computational methods for data integration and missing value
                      imputation for omic data analyses.We provide formal
                      definitions for missing value mechanisms and propose a novel
                      statistical taxonomy for batch effects, especially in the
                      presence of missing data. Based on an automated document
                      search and systematic literature review, we describe 32
                      distinct data integration methods from five main
                      methodological categories, as well as 37 algorithms for
                      missing value imputation from five separate categories.
                      Additionally, this review highlights multiple quantitative
                      evaluation methods to aid researchers in selecting a
                      suitable set of methods for their work. Finally, this work
                      provides an integrated discussion of the relevance of batch
                      effects and missing values in omics with corresponding
                      method recommendations. We then propose a comprehensive
                      three-step workflow from the study conception to final data
                      analysis and deduce perspectives for future research.
                      Eventually, we present a comprehensive flow chart as well as
                      exemplary decision trees to aid practitioners in the
                      selection of specific approaches for imputation and data
                      integration in their studies.},
      cin          = {IT},
      ddc          = {540},
      cid          = {I:(DE-H253)IT-20120731},
      pnm          = {623 - Data Management and Analysis (POF4-623)},
      pid          = {G:(DE-HGF)POF4-623},
      experiment   = {EXP:(DE-MLZ)NOSPEC-20140101},
      typ          = {PUB:(DE-HGF)16},
      pubmed       = {pmid:39740174},
      doi          = {10.1002/pmic.202400100},
      url          = {https://bib-pubdb1.desy.de/record/623229},
}