% IMPORTANT: The following is UTF-8 encoded. This means that in the presence
% of non-ASCII characters, it will not work with BibTeX 0.99 or older.
% Instead, you should use an up-to-date BibTeX implementation like “bibtex8” or
% “biber”.
@ARTICLE{Chia:639637,
author = {Chia, Ervin S. H. and Berberich, Tim B. and Sobolev, Egor
and Koliyadu, Jayanath C. P. and Adams, Patrick and André,
Tomas and Antonia, Fabio Dall and Cardoch, Sebastian and De
Santis, Emiliano and Formosa, Andrew and Hammarstroem,
Bjoern and Hassett, Michael P. and Kim, Seonmyeong and
Kloos, Marco and Letrun, Romain and Malka, Janusz and
Monrroy Vilan e Melo, Diogo Filipe and Paporakis, Stefan and
Sato, Tokushi and Schmidt, Philipp and Turkot, Oleksii and
Vakili, Mohammad and Valerio, Joana and Yenupuri, Tej Varma
and You, Tong and de Wijn, Raphaël and Park, Gun-Sik and
Abbey, Brian and Darmanin, Connie and Bajt, Saša and
Chapman, Henry N. and Bielecki, Johan and Maia, Filipe R. N.
C. and Timneanu, Nicusor and Caleman, Carl and Martin,
Andrew V. and Kurta, Ruslan P. and Sellberg, Jonas A. and
Loh, Ne-te Duane},
title = {{C}oarse-{G}raining and {C}lassifying {M}assive
{H}igh-{T}hroughput {XFEL} {D}atasets of {C}rystallization
in {S}upercooled {W}ater},
journal = {Crystals},
volume = {15},
number = {8},
issn = {2073-4352},
address = {Basel},
publisher = {MDPI},
reportid = {PUBDB-2025-04594},
pages = {734},
year = {2025},
abstract = {Ice crystallization in supercooled water is a complex
phenomenon with far-reaching implications across scientific
disciplines, including cloud formation physics and
cryopreservation. Experimentally studying such complexity
can be a highly data-driven and data-hungry endeavor because
of the need to record rare events that cannot be triggered
on demand. Here, we describe such an experiment comprising
561 million images of X-ray free-electron laser (XFEL)
diffraction patterns (2.3 PB raw data) spanning the
disorder-to-order transition in micrometer-sized supercooled
water droplets. To effectively analyze these patterns, we
propose a data reduction (i.e., coarse-graining) and
dimensionality reduction (i.e., principal component
analysis) strategy. We show that a simple set of criteria on
this reduced dataset can efficiently classify these patterns
in the absence of reference diffraction signatures, which we
validated using more precise but computationally expensive
unsupervised machine learning techniques. For hit-finding,
our strategy attained $98\%$ agreement with our
cross-validation. We speculate that these strategies may be
generalized to other types of large high-dimensional
datasets generated at high-throughput XFEL facilities.},
cin = {CFEL-I / FS-ML / $XFEL_DO_DD_DA$ / $XFEL_E1_SPB/SFX$ /
$XFEL_E2_SEC$ / $XFEL_E2_THE$},
ddc = {540},
cid = {I:(DE-H253)CFEL-I-20161114 / I:(DE-H253)FS-ML-20120731 /
$I:(DE-H253)XFEL_DO_DD_DA-20210408$ /
$I:(DE-H253)XFEL_E1_SPB_SFX-20210408$ /
$I:(DE-H253)XFEL_E2_SEC-20210408$ /
$I:(DE-H253)XFEL_E2_THE-20210408$},
pnm = {633 - Life Sciences – Building Blocks of Life: Structure
and Function (POF4-633) / AIM, DFG project
G:(GEPRIS)390715994 - EXC 2056: CUI: Advanced Imaging of
Matter (390715994)},
pid = {G:(DE-HGF)POF4-633 / G:(GEPRIS)390715994},
experiment = {EXP:(DE-H253)XFEL-SPB-20150101},
typ = {PUB:(DE-HGF)16},
doi = {10.3390/cryst15080734},
url = {https://bib-pubdb1.desy.de/record/639637},
}