% IMPORTANT: The following is UTF-8 encoded.  This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.

@PHDTHESIS{Kotobi:619753,
      author       = {Kotobi, Amir},
      othercontributors = {Meissner, Robert and Bari, Sadia and Huber, Patrick},
      title        = {{D}ynamic structure investigation and spectra prediction of
                      biomolecules using machine learning techniques},
      school       = {Technische Universität Hamburg},
      type         = {Dissertation},
      publisher    = {TUHH Universitätsbibliothek},
      reportid     = {PUBDB-2024-07888},
      pages        = {141},
      year         = {2024},
      note         = {Dissertation, Technische Universität Hamburg, 2024},
      abstract     = {The investigation of biomolecular structures and the
                      prediction of their spectra using experimental and
                      theoretical studies in the gas phase represent fundamental
                      steps in comprehending their intrinsic properties and
                      biological functions. Nonetheless, the complexity of the
                      potential energy surface of biomolecules, combined with
                      limitations in computational resources, limits the
                      interpretation of experimental observations. Integrating
                      supervised and unsupervised machine learning (ML) techniques
                      into theoretical calculations is considered as an effective
                      way to address these challenges. Infrared (IR) and X-ray
                      absorption spectroscopy (XAS) has proven to be powerful
                      experimental techniques to study the electronic and spatial
                      structure of biomolecules such as peptides and proteins.
                      Reproducing and validating the features observed in spectra
                      resulting from these experiments often requires the use of
                      sophisticated ab initio calculations and comprehensive
                      understanding of biomolecules’ configurational space. In
                      this thesis, I introduced a novel approach in interpretation
                      of IR experimental spectrum of a peptide which aims
                      enhancing the exploratory power of searching configurational
                      space by combining REMD simulations, unsupervised machine
                      learning, and ab initio calculations. This scheme relies on
                      a set of structural descriptors and data-driven clustering
                      technique which accounts for canonical ensemble of real
                      experimental condition to obtain an accurate computed
                      spectrum. We show that by partitioning the configurational
                      space into subensembles of imilar conformations i.e.
                      clusters, an accurate IR spectrum can be calculated by
                      averaging the IR contribution of each representative
                      conformer in each cluster, weighted according to the
                      population of each cluster. While this approach unravels
                      important fingerprints of experimental spectroscopic data,
                      the calculation of IR and particularly XAS spectra, due to
                      its inherently expensive theoretical computation, is often
                      computationally prohibitive task for even medium-sized
                      molecules. To remedy the computational obstacles associated
                      with spectra prediction, we develope a data-driven
                      supervised ML frameworks, i.e. graph neural networks which
                      are trained on a custom-generated XAS dataset to find a
                      mapping between structures and spectroscopic signals, thus
                      bypassing the need for expensive ab initio quantum chemistry
                      calculations. To insure the interpretability of GNN
                      models’ predictions, we employ feature attribution to
                      determine the respective contributions of various atoms in
                      the molecules to the peaks observed in the XAS spectrum.
                      Within this approach, we show that it is possible to link
                      the peaks observed in the spectra to certain core and
                      virtual orbitals from the quantum chemical calculations and
                      obtain an in-depth understanding of the ML predicted XAS
                      spectrum. The results presented in this thesis show that the
                      integration of supervised and unsupervised ML techniques can
                      effectively enhance the interpretation of spectroscopic data
                      and make efficient use of the expensive ab initio
                      calculations.Die Infrarot- und
                      Röntgenabsorptionsspektroskopie haben sich als
                      leistungsfähige experimentelle Instrumente zur die
                      elektronischen und strukturellen Feinheiten von
                      Biomolekülen, insbesondere Peptiden und Proteinen,
                      aufzuklären. Parallel dazu haben die bemerkenswerten
                      Fortschritte bei den Rechenkapazitäten die Fähigkeit
                      beschleunigt die Fähigkeit, Chemie, Physik und maschinelles
                      Lernen in einer echten Symbiose zu kombinieren, wodurch die
                      präzise Modellierung und Verständnis komplexer
                      biomolekularer Prozesse auf atomarer Ebene und die
                      Validierung von experimentell beobachteten
                      Spektralmerkmalen. Doch die inhärente Komplexität von
                      Peptiden und Proteinen, gekoppelt mit den
                      Rechenanforderungen quantenmechanischer Methoden für große
                      Systeme stellen jedoch eine große Herausforderung dar, wenn
                      es darum geht, die inhärenten Eigenschaften dieser
                      Biomolekülen. Um diese Herausforderungen zu bewältigen,
                      ist die Einbeziehung von überwachten und unüberwachten
                      Techniken des maschinellen Lernens in die
                      Molekulardynamik-Simulations-Toolbox erleichtert die das
                      komplexe Zusammenspiel interatomarer und intermolekularer
                      Wechselwirkungen zu entschlüsseln und den Weg für die den
                      Weg für die Vorhersage verschiedener Eigenschaften dieser
                      Systeme. Diese Dissertation befasst sich mit Feature und
                      Techniken des unüberwachten maschinellen Lernens (z. B.
                      Clustering und Dimensionality-Reduction), die auf
                      atomistische Datensätze angewandt werden, um zu
                      untersuchen, wie diese Techniken die komplexe
                      Strukturlandschaft eines Modellpeptids beleuchten können.
                      Darüber hinaus werden in dieser Arbeit Graph neuronale
                      Netze als leistungsstarker und effizienter Ansatz zur
                      Entschlüsselung der komplizierten.},
      keywords     = {Machine learning (Other) / Infrared (IR) (Other) / X-ray
                      absorption spectroscopy (XAS) (Other) / Graph neural
                      networks (GNN) (Other) / Explainability AI (Other) / Natural
                      Sciences and Mathematics::540: Chemistry (Other) / Natural
                      Sciences and Mathematics::570: Life Sciences, Biology
                      (Other) / Natural Sciences and Mathematics::510: Mathematics
                      (Other)},
      cin          = {FS-BIG},
      cid          = {I:(DE-H253)FS-BIG-20220318},
      pnm          = {633 - Life Sciences – Building Blocks of Life: Structure
                      and Function (POF4-633) / HIDSS-0002 - DASHH: Data Science
                      in Hamburg - Helmholtz Graduate School for the Structure of
                      Matter $(2019_IVF-HIDSS-0002)$ / PHGS, VH-GS-500 - PIER
                      Helmholtz Graduate School $(2015_IFV-VH-GS-500)$},
      pid          = {G:(DE-HGF)POF4-633 / $G:(DE-HGF)2019_IVF-HIDSS-0002$ /
                      $G:(DE-HGF)2015_IFV-VH-GS-500$},
      experiment   = {EXP:(DE-MLZ)NOSPEC-20140101 /
                      EXP:(DE-MLZ)External-20140101},
      typ          = {PUB:(DE-HGF)11},
      doi          = {10.15480/882.9689},
      url          = {https://bib-pubdb1.desy.de/record/619753},
}