% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@ARTICLE{Genz:642193,
      author       = {Genz, Luca R. and Nair, Sanjana and Nagar, Natan and Topf,
                      Maya},
      title        = {{A}ssessing scoring metrics for {A}lpha{F}old2 and
                      {A}lpha{F}old3 protein complex predictions},
      journal      = {Protein science},
      volume       = {34},
      number       = {11},
      issn         = {0961-8368},
      address      = {Hoboken, NJ},
      publisher    = {Wiley},
      reportid     = {PUBDB-2025-05389},
      pages        = {e70327},
      year         = {2025},
      note         = {DFG CRC1648},
      abstract     = {Recent breakthroughs in AI-driven protein structure
                      prediction have revolutionized structural biology, unlocking
                      new possibilities to model complex biomolecular
                      interactions. We evaluated widely used scoring metrics for
                      assessing models predicted by ColabFold with templates,
                      ColabFold without templates, and AlphaFold3. We benchmarked
                      the optimal cutoffs for these assessment scores using a set
                      of 223 heterodimeric, high-resolution protein structures and
                      their predictions. Our results show that ColabFold with
                      templates and AlphaFold3 perform similarly, and both
                      outperform ColabFold without templates. However, the
                      assessment scores perform best on ColabFold without
                      templates. Furthermore, interface-specific scores are more
                      reliable for evaluating protein complex predictions compared
                      to the corresponding global scores. Notably, ipTM and model
                      confidence achieve the best discrimination between correct
                      and incorrect predictions. Based on our results, we
                      developed a weighted combined score, C2Qscore, to improve
                      model quality assessment. We used C2Qscore to analyze dimers
                      from large assemblies solved by cryoEM, revealing potential
                      limitations of the existing metrics when multiple
                      configurations of heterodimers are possible. This study
                      provides insights into the strengths and weaknesses of
                      current scores and offers guidance for improving protein
                      complex model assessment under realistic use case
                      conditions. C2Qscore has been integrated as a tool into our
                      ChimeraX plug-in PICKLUSTER v.2.0 and is also available as a
                      command-line tool on https://gitlab.com/topf-lab/c2qscore.},
      cin          = {CSSB-LIV/UKE-MT},
      ddc          = {610},
      cid          = {$I:(DE-H253)CSSB-LIV_UKE-MT-20220525$},
      pnm          = {899 - ohne Topic (POF4-899)},
      pid          = {G:(DE-HGF)POF4-899},
      experiment   = {EXP:(DE-MLZ)NOSPEC-20140101},
      typ          = {PUB:(DE-HGF)16},
      doi          = {10.1002/pro.70327},
      url          = {https://bib-pubdb1.desy.de/record/642193},
}