000639637 001__ 639637
000639637 005__ 20251119161937.0
000639637 0247_ $$2doi$$a10.3390/cryst15080734
000639637 0247_ $$2datacite_doi$$a10.3204/PUBDB-2025-04594
000639637 0247_ $$2openalex$$aopenalex:W4413318259
000639637 037__ $$aPUBDB-2025-04594
000639637 041__ $$aEnglish
000639637 082__ $$a540
000639637 1001_ $$0P:(DE-H253)PIP1106448$$aChia, Ervin S. H.$$b0
000639637 245__ $$aCoarse-Graining and Classifying Massive High-Throughput XFEL Datasets of Crystallization in Supercooled Water
000639637 260__ $$aBasel$$bMDPI$$c2025
000639637 3367_ $$2DRIVER$$aarticle
000639637 3367_ $$2DataCite$$aOutput Types/Journal article
000639637 3367_ $$0PUB:(DE-HGF)16$$2PUB:(DE-HGF)$$aJournal Article$$bjournal$$mjournal$$s1761824815_1451513
000639637 3367_ $$2BibTeX$$aARTICLE
000639637 3367_ $$2ORCID$$aJOURNAL_ARTICLE
000639637 3367_ $$00$$2EndNote$$aJournal Article
000639637 520__ $$aIce crystallization in supercooled water is a complex phenomenon with far-reaching implications across scientific disciplines, including cloud formation physics and cryopreservation. Experimentally studying such complexity can be a highly data-driven and data-hungry endeavor because of the need to record rare events that cannot be triggered on demand. Here, we describe such an experiment comprising 561 million images of X-ray free-electron laser (XFEL) diffraction patterns (2.3 PB raw data) spanning the disorder-to-order transition in micrometer-sized supercooled water droplets. To effectively analyze these patterns, we propose a data reduction (i.e., coarse-graining) and dimensionality reduction (i.e., principal component analysis) strategy. We show that a simple set of criteria on this reduced dataset can efficiently classify these patterns in the absence of reference diffraction signatures, which we validated using more precise but computationally expensive unsupervised machine learning techniques. For hit-finding, our strategy attained 98% agreement with our cross-validation. We speculate that these strategies may be generalized to other types of large high-dimensional datasets generated at high-throughput XFEL facilities.
000639637 536__ $$0G:(DE-HGF)POF4-633$$a633 - Life Sciences – Building Blocks of Life: Structure and Function (POF4-633)$$cPOF4-633$$fPOF IV$$x0
000639637 536__ $$0G:(GEPRIS)390715994$$aAIM, DFG project G:(GEPRIS)390715994 - EXC 2056: CUI: Advanced Imaging of Matter (390715994)$$c390715994$$x1
000639637 588__ $$aDataset connected to CrossRef, Journals: bib-pubdb1.desy.de
000639637 693__ $$0EXP:(DE-H253)XFEL-SPB-20150101$$1EXP:(DE-H253)XFEL-20150101$$5EXP:(DE-H253)XFEL-SPB-20150101$$6EXP:(DE-H253)XFEL-SASE1-20150101$$aXFEL$$eSPB: Single Particles, clusters & Biomolecules$$fSASE1$$x0
000639637 7001_ $$0P:(DE-H253)PIP1092139$$aBerberich, Tim B.$$b1
000639637 7001_ $$0P:(DE-H253)PIP1082551$$aSobolev, Egor$$b2
000639637 7001_ $$0P:(DE-H253)PIP1031917$$aKoliyadu, Jayanath C. P.$$b3
000639637 7001_ $$0P:(DE-H253)PIP1095446$$aAdams, Patrick$$b4
000639637 7001_ $$0P:(DE-H253)PIP1103745$$aAndré, Tomas$$b5
000639637 7001_ $$00000-0003-0799-2244$$aAntonia, Fabio Dall$$b6
000639637 7001_ $$0P:(DE-H253)PIP1096981$$aCardoch, Sebastian$$b7
000639637 7001_ $$0P:(DE-H253)PIP1081082$$aDe Santis, Emiliano$$b8
000639637 7001_ $$aFormosa, Andrew$$b9
000639637 7001_ $$0P:(DE-H253)PIP1110082$$aHammarstroem, Bjoern$$b10
000639637 7001_ $$0P:(DE-H253)PIP1105014$$aHassett, Michael P.$$b11
000639637 7001_ $$0P:(DE-H253)PIP1105331$$aKim, Seonmyeong$$b12
000639637 7001_ $$0P:(DE-H253)PIP1082156$$aKloos, Marco$$b13
000639637 7001_ $$0P:(DE-H253)PIP1026177$$aLetrun, Romain$$b14
000639637 7001_ $$0P:(DE-H253)PIP1000004$$aMalka, Janusz$$b15
000639637 7001_ $$0P:(DE-H253)PIP1082228$$aMonrroy Vilan e Melo, Diogo Filipe$$b16
000639637 7001_ $$0P:(DE-H253)PIP1105015$$aPaporakis, Stefan$$b17
000639637 7001_ $$0P:(DE-H253)PIP1021842$$aSato, Tokushi$$b18
000639637 7001_ $$0P:(DE-H253)PIP1014429$$aSchmidt, Philipp$$b19
000639637 7001_ $$0P:(DE-H253)PIP1013504$$aTurkot, Oleksii$$b20
000639637 7001_ $$0P:(DE-H253)PIP1017960$$aVakili, Mohammad$$b21
000639637 7001_ $$0P:(DE-H253)PIP1010569$$aValerio, Joana$$b22
000639637 7001_ $$0P:(DE-H253)PIP1090926$$aYenupuri, Tej Varma$$b23
000639637 7001_ $$0P:(DE-H253)PIP1095687$$aYou, Tong$$b24
000639637 7001_ $$0P:(DE-H253)PIP1089954$$ade Wijn, Raphaël$$b25
000639637 7001_ $$00000-0002-1982-2077$$aPark, Gun-Sik$$b26
000639637 7001_ $$aAbbey, Brian$$b27
000639637 7001_ $$0P:(DE-H253)PIP1028747$$aDarmanin, Connie$$b28
000639637 7001_ $$0P:(DE-H253)PIP1006443$$aBajt, Saša$$b29
000639637 7001_ $$0P:(DE-H253)PIP1006324$$aChapman, Henry N.$$b30
000639637 7001_ $$0P:(DE-H253)PIP1020945$$aBielecki, Johan$$b31
000639637 7001_ $$0P:(DE-H253)PIP1008707$$aMaia, Filipe R. N. C.$$b32
000639637 7001_ $$0P:(DE-H253)PIP1008264$$aTimneanu, Nicusor$$b33
000639637 7001_ $$0P:(DE-H253)PIP1011740$$aCaleman, Carl$$b34
000639637 7001_ $$0P:(DE-H253)PIP1011473$$aMartin, Andrew V.$$b35
000639637 7001_ $$0P:(DE-H253)PIP1007576$$aKurta, Ruslan P.$$b36
000639637 7001_ $$0P:(DE-H253)PIP1014756$$aSellberg, Jonas A.$$b37$$eCorresponding author
000639637 7001_ $$0P:(DE-H253)PIP1013332$$aLoh, Ne-te Duane$$b38$$eCorresponding author
000639637 773__ $$0PERI:(DE-600)2661516-2$$a10.3390/cryst15080734$$gVol. 15, no. 8, p. 734 -$$n8$$p734 $$tCrystals$$v15$$x2073-4352$$y2025
000639637 8564_ $$uhttps://bib-pubdb1.desy.de/record/639637/files/crystals-15-00734.pdf$$yOpenAccess
000639637 8564_ $$uhttps://bib-pubdb1.desy.de/record/639637/files/crystals-15-00734.pdf?subformat=pdfa$$xpdfa$$yOpenAccess
000639637 909CO $$ooai:bib-pubdb1.desy.de:639637$$pdnbdelivery$$pdriver$$pVDB$$popen_access$$popenaire
000639637 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1106448$$aExternal Institute$$b0$$kExtern
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1106448$$aEuropean XFEL$$b0$$kXFEL.EU
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1092139$$aEuropean XFEL$$b1$$kXFEL.EU
000639637 9101_ $$0I:(DE-588b)235011-7$$6P:(DE-H253)PIP1082551$$aEuropean Molecular Biology Laboratory$$b2$$kEMBL
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1082551$$aEuropean XFEL$$b2$$kXFEL.EU
000639637 9101_ $$0I:(DE-588b)2008985-5$$6P:(DE-H253)PIP1031917$$aDeutsches Elektronen-Synchrotron$$b3$$kDESY
000639637 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1031917$$aExternal Institute$$b3$$kExtern
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1031917$$aEuropean XFEL$$b3$$kXFEL.EU
000639637 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1095446$$aExternal Institute$$b4$$kExtern
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1095446$$aEuropean XFEL$$b4$$kXFEL.EU
000639637 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1103745$$aExternal Institute$$b5$$kExtern
000639637 9101_ $$0I:(DE-H253)_CFEL-20120731$$6P:(DE-H253)PIP1096981$$aCentre for Free-Electron Laser Science$$b7$$kCFEL
000639637 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1096981$$aExternal Institute$$b7$$kExtern
000639637 9101_ $$0I:(DE-H253)_CFEL-20120731$$6P:(DE-H253)PIP1081082$$aCentre for Free-Electron Laser Science$$b8$$kCFEL
000639637 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1081082$$aExternal Institute$$b8$$kExtern
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1081082$$aEuropean XFEL$$b8$$kXFEL.EU
000639637 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1110082$$aExternal Institute$$b10$$kExtern
000639637 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1105014$$aExternal Institute$$b11$$kExtern
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1105014$$aEuropean XFEL$$b11$$kXFEL.EU
000639637 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1105331$$aExternal Institute$$b12$$kExtern
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1105331$$aEuropean XFEL$$b12$$kXFEL.EU
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1082156$$aEuropean XFEL$$b13$$kXFEL.EU
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1026177$$aEuropean XFEL$$b14$$kXFEL.EU
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1000004$$aEuropean XFEL$$b15$$kXFEL.EU
000639637 9101_ $$0I:(DE-588b)235011-7$$6P:(DE-H253)PIP1082228$$aEuropean Molecular Biology Laboratory$$b16$$kEMBL
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1082228$$aEuropean XFEL$$b16$$kXFEL.EU
000639637 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1082228$$aExternal Institute$$b16$$kExtern
000639637 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1105015$$aExternal Institute$$b17$$kExtern
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1105015$$aEuropean XFEL$$b17$$kXFEL.EU
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1021842$$aEuropean XFEL$$b18$$kXFEL.EU
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1014429$$aEuropean XFEL$$b19$$kXFEL.EU
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1013504$$aEuropean XFEL$$b20$$kXFEL.EU
000639637 9101_ $$0I:(DE-588b)2008985-5$$6P:(DE-H253)PIP1017960$$aDeutsches Elektronen-Synchrotron$$b21$$kDESY
000639637 9101_ $$0I:(DE-H253)_CFEL-20120731$$6P:(DE-H253)PIP1017960$$aCentre for Free-Electron Laser Science$$b21$$kCFEL
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1017960$$aEuropean XFEL$$b21$$kXFEL.EU
000639637 9101_ $$0I:(DE-588b)2008985-5$$6P:(DE-H253)PIP1010569$$aDeutsches Elektronen-Synchrotron$$b22$$kDESY
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1010569$$aEuropean XFEL$$b22$$kXFEL.EU
000639637 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1010569$$aExternal Institute$$b22$$kExtern
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1090926$$aEuropean XFEL$$b23$$kXFEL.EU
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1095687$$aEuropean XFEL$$b24$$kXFEL.EU
000639637 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1095687$$aExternal Institute$$b24$$kExtern
000639637 9101_ $$0I:(DE-588b)235011-7$$6P:(DE-H253)PIP1089954$$aEuropean Molecular Biology Laboratory$$b25$$kEMBL
000639637 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1089954$$aExternal Institute$$b25$$kExtern
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1089954$$aEuropean XFEL$$b25$$kXFEL.EU
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1028747$$aEuropean XFEL$$b28$$kXFEL.EU
000639637 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1028747$$aExternal Institute$$b28$$kExtern
000639637 9101_ $$0I:(DE-588b)2008985-5$$6P:(DE-H253)PIP1006443$$aDeutsches Elektronen-Synchrotron$$b29$$kDESY
000639637 9101_ $$0I:(DE-H253)_CFEL-20120731$$6P:(DE-H253)PIP1006443$$aCentre for Free-Electron Laser Science$$b29$$kCFEL
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1006443$$aEuropean XFEL$$b29$$kXFEL.EU
000639637 9101_ $$0I:(DE-588b)2008985-5$$6P:(DE-H253)PIP1006324$$aDeutsches Elektronen-Synchrotron$$b30$$kDESY
000639637 9101_ $$0I:(DE-H253)_CFEL-20120731$$6P:(DE-H253)PIP1006324$$aCentre for Free-Electron Laser Science$$b30$$kCFEL
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1006324$$aEuropean XFEL$$b30$$kXFEL.EU
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1020945$$aEuropean XFEL$$b31$$kXFEL.EU
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1008707$$aEuropean XFEL$$b32$$kXFEL.EU
000639637 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1008707$$aExternal Institute$$b32$$kExtern
000639637 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1008264$$aExternal Institute$$b33$$kExtern
000639637 9101_ $$0I:(DE-588b)2008985-5$$6P:(DE-H253)PIP1011740$$aDeutsches Elektronen-Synchrotron$$b34$$kDESY
000639637 9101_ $$0I:(DE-H253)_CFEL-20120731$$6P:(DE-H253)PIP1011740$$aCentre for Free-Electron Laser Science$$b34$$kCFEL
000639637 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1011473$$aExternal Institute$$b35$$kExtern
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1007576$$aEuropean XFEL$$b36$$kXFEL.EU
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1014756$$aEuropean XFEL$$b37$$kXFEL.EU
000639637 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1014756$$aExternal Institute$$b37$$kExtern
000639637 9101_ $$0I:(DE-588)1043621512$$6P:(DE-H253)PIP1013332$$aEuropean XFEL$$b38$$kXFEL.EU
000639637 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1013332$$aExternal Institute$$b38$$kExtern
000639637 9131_ $$0G:(DE-HGF)POF4-633$$1G:(DE-HGF)POF4-630$$2G:(DE-HGF)POF4-600$$3G:(DE-HGF)POF4$$4G:(DE-HGF)POF$$aDE-HGF$$bForschungsbereich Materie$$lVon Materie zu Materialien und Leben$$vLife Sciences – Building Blocks of Life: Structure and Function$$x0
000639637 9141_ $$y2025
000639637 915__ $$0StatID:(DE-HGF)0200$$2StatID$$aDBCoverage$$bSCOPUS$$d2024-12-18
000639637 915__ $$0StatID:(DE-HGF)0160$$2StatID$$aDBCoverage$$bEssential Science Indicators$$d2024-12-18
000639637 915__ $$0LIC:(DE-HGF)CCBY4$$2HGFVOC$$aCreative Commons Attribution CC BY 4.0
000639637 915__ $$0StatID:(DE-HGF)0100$$2StatID$$aJCR$$bCRYSTALS : 2022$$d2024-12-18
000639637 915__ $$0StatID:(DE-HGF)0501$$2StatID$$aDBCoverage$$bDOAJ Seal$$d2024-04-10T15:26:56Z
000639637 915__ $$0StatID:(DE-HGF)0500$$2StatID$$aDBCoverage$$bDOAJ$$d2024-04-10T15:26:56Z
000639637 915__ $$0StatID:(DE-HGF)0113$$2StatID$$aWoS$$bScience Citation Index Expanded$$d2024-12-18
000639637 915__ $$0StatID:(DE-HGF)0700$$2StatID$$aFees$$d2024-12-18
000639637 915__ $$0StatID:(DE-HGF)0150$$2StatID$$aDBCoverage$$bWeb of Science Core Collection$$d2024-12-18
000639637 915__ $$0StatID:(DE-HGF)9900$$2StatID$$aIF < 5$$d2024-12-18
000639637 915__ $$0StatID:(DE-HGF)0510$$2StatID$$aOpenAccess
000639637 915__ $$0StatID:(DE-HGF)0030$$2StatID$$aPeer Review$$bDOAJ : Anonymous peer review$$d2024-04-10T15:26:56Z
000639637 915__ $$0StatID:(DE-HGF)0561$$2StatID$$aArticle Processing Charges$$d2024-12-18
000639637 915__ $$0StatID:(DE-HGF)1150$$2StatID$$aDBCoverage$$bCurrent Contents - Physical, Chemical and Earth Sciences$$d2024-12-18
000639637 915__ $$0StatID:(DE-HGF)0300$$2StatID$$aDBCoverage$$bMedline$$d2024-12-18
000639637 915__ $$0StatID:(DE-HGF)0199$$2StatID$$aDBCoverage$$bClarivate Analytics Master Journal List$$d2024-12-18
000639637 9201_ $$0I:(DE-H253)CFEL-I-20161114$$kCFEL-I$$lFS-CFEL-1 (Group Leader: Henry Chapman)$$x0
000639637 9201_ $$0I:(DE-H253)FS-ML-20120731$$kFS-ML$$lFS-Arbeitsgruppe$$x1
000639637 9201_ $$0I:(DE-H253)XFEL_DO_DD_DA-20210408$$kXFEL_DO_DD_DA$$lData Analysis$$x2
000639637 9201_ $$0I:(DE-H253)XFEL_E1_SPB_SFX-20210408$$kXFEL_E1_SPB/SFX$$lSPB/SFX$$x3
000639637 9201_ $$0I:(DE-H253)XFEL_E2_SEC-20210408$$kXFEL_E2_SEC$$lSample Environment and Characterisation$$x4
000639637 9201_ $$0I:(DE-H253)XFEL_E2_THE-20210408$$kXFEL_E2_THE$$lTheory$$x5
000639637 980__ $$ajournal
000639637 980__ $$aVDB
000639637 980__ $$aUNRESTRICTED
000639637 980__ $$aI:(DE-H253)CFEL-I-20161114
000639637 980__ $$aI:(DE-H253)FS-ML-20120731
000639637 980__ $$aI:(DE-H253)XFEL_DO_DD_DA-20210408
000639637 980__ $$aI:(DE-H253)XFEL_E1_SPB_SFX-20210408
000639637 980__ $$aI:(DE-H253)XFEL_E2_SEC-20210408
000639637 980__ $$aI:(DE-H253)XFEL_E2_THE-20210408
000639637 9801_ $$aFullTexts