000614260 001__ 614260
000614260 005__ 20250723172352.0
000614260 0247_ $$2doi$$a10.1039/D4DD00001C
000614260 0247_ $$2datacite_doi$$a10.3204/PUBDB-2024-05800
000614260 0247_ $$2pmid$$a38756225
000614260 0247_ $$2WOS$$aWOS:001196386100001
000614260 0247_ $$2openalex$$aopenalex:W4393240154
000614260 037__ $$aPUBDB-2024-05800
000614260 041__ $$aEnglish
000614260 082__ $$a004
000614260 1001_ $$00000-0002-0298-6016$$aKjær, Emil T. S.$$b0
000614260 245__ $$aMLstructureMining: a machine learning tool for structure identification from X-ray pair distribution functions
000614260 260__ $$aWashington DC$$bRoyal Society of Chemistry$$c2024
000614260 3367_ $$2DRIVER$$aarticle
000614260 3367_ $$2DataCite$$aOutput Types/Journal article
000614260 3367_ $$0PUB:(DE-HGF)16$$2PUB:(DE-HGF)$$aJournal Article$$bjournal$$mjournal$$s1737383745_1543908
000614260 3367_ $$2BibTeX$$aARTICLE
000614260 3367_ $$2ORCID$$aJOURNAL_ARTICLE
000614260 3367_ $$00$$2EndNote$$aJournal Article
000614260 520__ $$aSynchrotron X-ray techniques are essential for studies of the intrinsic relationship between synthesis, structure, and properties of materials. Modern synchrotrons can produce up to 1 petabyte of data per day. Such amounts of data can speed up materials development, but also comes with a staggering growth in workload, as the data generated must be stored and analyzed. We present an approach for quickly identifying an atomic structure model from pair distribution function (PDF) data from (nano)crystalline materials. Our model, MLstructureMining, uses a tree-based machine learning (ML) classifier. MLstructureMining has been trained to classify chemical structures from a PDF and gives a top-3 accuracy of 99% on simulated PDFs not seen during training, with a total of 6062 possible classes. We also demonstrate that MLstructureMining can identify the chemical structure from experimental PDFs from nanoparticles of CoFe$_2$O$_4$ and CeO$_2$, and we show how it can be used to treat an in situ PDF series collected during Bi$_2$Fe$_4$O$_9$ formation. Additionally, we show how MLstructureMining can be used in combination with the well-known methods, principal component analysis (PCA) and non-negative matrix factorization (NMF) to analyze data from in situ experiments. MLstructureMining thus allows for real-time structure characterization by screening vast quantities of crystallographic information files in seconds.
000614260 536__ $$0G:(DE-HGF)POF4-6G3$$a6G3 - PETRA III (DESY) (POF4-6G3)$$cPOF4-6G3$$fPOF IV$$x0
000614260 536__ $$0G:(GEPRIS)429360100$$aDFG project G:(GEPRIS)429360100 - Studien an in-situ Daten der totalen Streufunktion: Bildungsmechanismen von ternären multiferroischen Bismutferraten (429360100)$$c429360100$$x1
000614260 542__ $$2Crossref$$i2024-03-27$$uhttp://creativecommons.org/licenses/by-nc/3.0/
000614260 588__ $$aDataset connected to CrossRef, Journals: bib-pubdb1.desy.de
000614260 693__ $$0EXP:(DE-H253)P-P02.1-20150101$$1EXP:(DE-H253)PETRAIII-20150101$$6EXP:(DE-H253)P-P02.1-20150101$$aPETRA III$$fPETRA Beamline P02.1$$x0
000614260 693__ $$0EXP:(DE-H253)P-P21.1-20150101$$1EXP:(DE-H253)PETRAIII-20150101$$6EXP:(DE-H253)P-P21.1-20150101$$aPETRA III$$fPETRA Beamline P21.1$$x1
000614260 7001_ $$0P:(DE-H253)PIP1081420$$aAnker, Andy S.$$b1
000614260 7001_ $$0P:(DE-H253)PIP1090396$$aKirsch, Andrea$$b2
000614260 7001_ $$aLajer, Joakim$$b3
000614260 7001_ $$0P:(DE-H253)PIP1086362$$aAalling-Frederiksen, Olivia$$b4
000614260 7001_ $$0P:(DE-H253)PIP1029031$$aBillinge, Simon J. L.$$b5
000614260 7001_ $$0P:(DE-H253)PIP1016581$$aJensen, Kirsten Marie$$b6$$eCorresponding author
000614260 77318 $$2Crossref$$3journal-article$$a10.1039/d4dd00001c$$bRoyal Society of Chemistry (RSC)$$d2024-01-01$$n5$$p908-918$$tDigital Discovery$$v3$$x2635-098X$$y2024
000614260 773__ $$0PERI:(DE-600)3142965-8$$a10.1039/D4DD00001C$$gVol. 3, no. 5, p. 908 - 918$$n5$$p908-918$$tDigital discovery$$v3$$x2635-098X$$y2024
000614260 8564_ $$uhttps://bib-pubdb1.desy.de/record/614260/files/d4dd00001c.pdf$$yOpenAccess
000614260 8564_ $$uhttps://bib-pubdb1.desy.de/record/614260/files/d4dd00001c.pdf?subformat=pdfa$$xpdfa$$yOpenAccess
000614260 909CO $$ooai:bib-pubdb1.desy.de:614260$$pdnbdelivery$$pdriver$$pVDB$$popen_access$$popenaire
000614260 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1081420$$aExternal Institute$$b1$$kExtern
000614260 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1090396$$aExternal Institute$$b2$$kExtern
000614260 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1086362$$aExternal Institute$$b4$$kExtern
000614260 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1029031$$aExternal Institute$$b5$$kExtern
000614260 9101_ $$0I:(DE-HGF)0$$6P:(DE-H253)PIP1016581$$aExternal Institute$$b6$$kExtern
000614260 9131_ $$0G:(DE-HGF)POF4-6G3$$1G:(DE-HGF)POF4-6G0$$2G:(DE-HGF)POF4-600$$3G:(DE-HGF)POF4$$4G:(DE-HGF)POF$$aDE-HGF$$bForschungsbereich Materie$$lGroßgeräte: Materie$$vPETRA III (DESY)$$x0
000614260 9141_ $$y2024
000614260 915__ $$0LIC:(DE-HGF)CCBYNC4$$2HGFVOC$$aCreative Commons Attribution-NonCommercial CC BY-NC 4.0
000614260 915__ $$0StatID:(DE-HGF)0510$$2StatID$$aOpenAccess
000614260 915__ $$0StatID:(DE-HGF)0200$$2StatID$$aDBCoverage$$bSCOPUS$$d2024-12-20
000614260 915__ $$0StatID:(DE-HGF)0300$$2StatID$$aDBCoverage$$bMedline$$d2024-12-20
000614260 915__ $$0StatID:(DE-HGF)0501$$2StatID$$aDBCoverage$$bDOAJ Seal$$d2024-07-17T09:51:18Z
000614260 915__ $$0StatID:(DE-HGF)0500$$2StatID$$aDBCoverage$$bDOAJ$$d2024-07-17T09:51:18Z
000614260 915__ $$0StatID:(DE-HGF)0030$$2StatID$$aPeer Review$$bDOAJ : Anonymous peer review$$d2024-07-17T09:51:18Z
000614260 915__ $$0StatID:(DE-HGF)0199$$2StatID$$aDBCoverage$$bClarivate Analytics Master Journal List$$d2024-12-20
000614260 915__ $$0StatID:(DE-HGF)0112$$2StatID$$aWoS$$bEmerging Sources Citation Index$$d2024-12-20
000614260 915__ $$0StatID:(DE-HGF)0150$$2StatID$$aDBCoverage$$bWeb of Science Core Collection$$d2024-12-20
000614260 9201_ $$0I:(DE-H253)HAS-User-20120731$$kDOOR ; HAS-User$$lDOOR-User$$x0
000614260 980__ $$ajournal
000614260 980__ $$aVDB
000614260 980__ $$aI:(DE-H253)HAS-User-20120731
000614260 980__ $$aUNRESTRICTED
000614260 9801_ $$aFullTexts
000614260 999C5 $$1Christiansen$$2Crossref$$9-- missing cx lookup --$$a10.1039/D0NA00120A$$p2234 -$$tNanoscale Adv.$$v2$$y2020
000614260 999C5 $$1Billinge$$2Crossref$$9-- missing cx lookup --$$a10.1126/science.1135080$$p561 -$$tScience$$v316$$y2007
000614260 999C5 $$1Juelsholt$$2Crossref$$9-- missing cx lookup --$$a10.1039/D1NR05991B$$p20144 -$$tNanoscale$$v13$$y2021
000614260 999C5 $$1Billinge$$2Crossref$$9-- missing cx lookup --$$a10.1039/B309577K$$p749 -$$tChem. Commun.$$y2004
000614260 999C5 $$1Lindahl Christiansen$$2Crossref$$9-- missing cx lookup --$$a10.1107/S1600576719016832$$p148 -$$tJ. Appl. Crystallogr.$$v53$$y2020
000614260 999C5 $$1Farrow$$2Crossref$$oFarrow 2007$$y2007
000614260 999C5 $$1Juhás$$2Crossref$$9-- missing cx lookup --$$a10.1107/S2053273315014473$$p562 -$$tActa Crystallogr., Sect. A: Found. Adv.$$v71$$y2015
000614260 999C5 $$1Proffen$$2Crossref$$9-- missing cx lookup --$$a10.1107/S002188989600934X$$p171 -$$tJ. Appl. Crystallogr.$$v30$$y1997
000614260 999C5 $$1Coelho$$2Crossref$$9-- missing cx lookup --$$a10.1107/S1600576718000183$$p210 -$$tJ. Appl. Crystallogr.$$v51$$y2018
000614260 999C5 $$1Yang$$2Crossref$$9-- missing cx lookup --$$a10.1107/S2053273320002028$$p395 -$$tActa Crystallogr., Sect. A: Found. Adv.$$v76$$y2020
000614260 999C5 $$1Yang$$2Crossref$$9-- missing cx lookup --$$a10.1107/S2053273320013066$$p2 -$$tActa Crystallogr., Sect. A: Found. Adv.$$v77$$y2021
000614260 999C5 $$1Banerjee$$2Crossref$$9-- missing cx lookup --$$a10.1107/S2053273319013214$$p24 -$$tActa Crystallogr., Sect. A: Found. Adv.$$v76$$y2020
000614260 999C5 $$1Kjær$$2Crossref$$oKjær 2022$$y2022
000614260 999C5 $$1Anker$$2Crossref$$9-- missing cx lookup --$$a10.1038/s41524-022-00896-3$$p213 -$$tnpj Comput. Mater.$$v8$$y2022
000614260 999C5 $$1Gu$$2Crossref$$9-- missing cx lookup --$$a10.1107/S2053273323000761$$p203 -$$tActa Crystallogr., Sect. A: Found. Adv.$$v79$$y2023
000614260 999C5 $$1Geddes$$2Crossref$$9-- missing cx lookup --$$a10.1039/C9CC06753A$$p13346 -$$tChem. Commun.$$v55$$y2019
000614260 999C5 $$1Liu$$2Crossref$$9-- missing cx lookup --$$a10.1107/S2053273319005606$$p633 -$$tActa Crystallogr., Sect. A: Found. Adv.$$v75$$y2019
000614260 999C5 $$1Kjær$$2Crossref$$9-- missing cx lookup --$$a10.1039/D2DD00086E$$p69 -$$tDigital Discovery$$v2$$y2023
000614260 999C5 $$1Anker$$2Crossref$$9-- missing cx lookup --$$a10.26434/chemrxiv.12662222.v1$$uA. S.Anker , E. T.Kjaer , E. B.Dam , S. J.Billinge , K. M.Jensen and R.Selvan , Proceedings of the 16th International Workshop on Mining and Learning with Graphs (MLG) , 2020 , 10.26434/chemrxiv.12662222.v1$$y2020
000614260 999C5 $$1Gražulis$$2Crossref$$9-- missing cx lookup --$$a10.1107/S0021889809016690$$p726 -$$tJ. Appl. Crystallogr.$$v42$$y2009
000614260 999C5 $$1Hall$$2Crossref$$9-- missing cx lookup --$$a10.1107/S010876739101067X$$p655 -$$tActa Crystallogr., Sect. A: Found. Adv.$$v47$$y1991
000614260 999C5 $$1Myers$$2Crossref$$tResearch design and statistical analysis Routledge$$uJ.Myers , A.Well and R.Lorch Jr , Research design and statistical analysis Routledge , Routledge , 2010$$y2010
000614260 999C5 $$1Bouhlel$$2Crossref$$9-- missing cx lookup --$$a10.1016/j.advengsoft.2019.03.005$$p102662 -$$tAdv. Eng. Software$$v135$$y2019
000614260 999C5 $$1Shannon$$2Crossref$$9-- missing cx lookup --$$a10.1109/JRPROC.1949.232969$$p10 -$$tProc. IRE$$v37$$y1949
000614260 999C5 $$1Farrow$$2Crossref$$9-- missing cx lookup --$$a10.1103/PhysRevB.84.134105$$p134105 -$$tPhys. Rev. B: Condens. Matter Mater. Phys.$$v84$$y2011
000614260 999C5 $$1Chen$$2Crossref$$9-- missing cx lookup --$$a10.1145/2939672.2939785$$p785 -$$uT.Chen and C.Guestrin , Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining , 2016 , vol. 22 , pp. 785–794$$v22$$y2016
000614260 999C5 $$1Li$$2Crossref$$9-- missing cx lookup --$$a10.48550/arXiv.1203.3491$$uP.Li , arXiv , 2012 , preprint, arXiv:1203.3491, 10.48550/arXiv.1203.3491$$y2012
000614260 999C5 $$1Nicolae$$2Crossref$$9-- missing cx lookup --$$a10.48550/arXiv.1807.01069$$uM.-I.Nicolae , M.Sinn , M. N.Tran , B.Buesser , A.Rawat , M.Wistuba , V.Zantedeschi , N.Baracaldo , B.Chen and H.Ludwig , arXiv , 2018 , preprint, arXiv:1807.01069, 10.48550/arXiv.1807.01069$$y2018
000614260 999C5 $$1Pearce$$2Crossref$$9-- missing cx lookup --$$a10.48550/arXiv.2106.04972$$uT.Pearce , A.Brintrup and J.Zhu , arXiv , 2021 , preprint, arXiv:2106.04972, 10.48550/arXiv.2106.04972$$y2021
000614260 999C5 $$1Chernick$$2Crossref$$tBootstrap methods: A guide for practitioners and researchers$$uM. R.Chernick , Bootstrap methods: A guide for practitioners and researchers , John Wiley & Sons , 2011$$y2011
000614260 999C5 $$1Efron$$2Crossref$$9-- missing cx lookup --$$a10.1201/9780429246593$$uB.Efron and R. J.Tibshirani , An introduction to the bootstrap , CRC press , 1994$$y1994
000614260 999C5 $$1Dietterich$$2Crossref$$tMultiple Classifier Systems. MCS 2000. Lecture Notes in Computer Science$$uT. G.Dietterich , in Multiple Classifier Systems. MCS 2000. Lecture Notes in Computer Science , 2000 , vol. 1857 , pp. 1–15$$y2000
000614260 999C5 $$1Prince$$2Crossref$$9-- missing cx lookup --$$a10.1063/1.2000504$$pS68 -$$tJ. Appl. Phys.$$v32$$y2009
000614260 999C5 $$1Jensen$$2Crossref$$9-- missing cx lookup --$$a10.1021/nn5044096$$p10704 -$$tACS Nano$$v8$$y2014
000614260 999C5 $$1Yang$$2Crossref$$9-- missing cx lookup --$$a10.1107/S2053273320013066$$p2 -$$tActa Crystallogr., Sect. A: Found. Adv.$$v77$$y2021
000614260 999C5 $$1Rojas$$2Crossref$$9-- missing cx lookup --$$a10.1006/jssc.1994.1312$$p322 -$$tJ. Solid State Chem.$$v112$$y1994
000614260 999C5 $$1Kaplunnik$$2Crossref$$oKaplunnik 1977$$y1977
000614260 999C5 $$1Iliev$$2Crossref$$9-- missing cx lookup --$$a10.1103/PhysRevB.81.024302$$p024302 -$$tPhys. Rev. B: Condens. Matter Mater. Phys.$$v81$$y2010
000614260 999C5 $$1Zheng$$2Crossref$$9-- missing cx lookup --$$a10.1038/s41524-018-0067-x$$p12 -$$tnpj Comput. Mater.$$v4$$y2018
000614260 999C5 $$1Butler$$2Crossref$$oButler 2021$$y2021
000614260 999C5 $$1Park$$2Crossref$$9-- missing cx lookup --$$a10.1107/S205225251700714X$$p486 -$$tIUCrJ$$v4$$y2017
000614260 999C5 $$1Ziletti$$2Crossref$$9-- missing cx lookup --$$a10.1038/s41467-018-05169-6$$p2775 -$$tNat. Commun.$$v9$$y2018
000614260 999C5 $$1Chen$$2Crossref$$9-- missing cx lookup --$$a10.1063/5.0049111$$p031301 -$$tChem. Phys. Rev.$$v2$$y2021
000614260 999C5 $$1Suzuki$$2Crossref$$9-- missing cx lookup --$$a10.1038/s41598-019-56847-4$$p1 -$$tSci. Rep.$$v10$$y2020
000614260 999C5 $$1Kirkpatrick$$2Crossref$$9-- missing cx lookup --$$a10.1126/science.abj6511$$p1385 -$$tScience$$v374$$y2021
000614260 999C5 $$1Tao$$2Crossref$$9-- missing cx lookup --$$a10.1038/s41578-021-00337-5$$p701 -$$tNat. Rev. Mater.$$v6$$y2021
000614260 999C5 $$1Liu$$2Crossref$$9-- missing cx lookup --$$a10.1107/S160057672100265X$$p768 -$$tJ. Appl. Crystallogr.$$v54$$y2021
000614260 999C5 $$1Stanev$$2Crossref$$9-- missing cx lookup --$$a10.1038/s41524-018-0099-2$$p43 -$$tnpj Comput. Mater.$$v4$$y2018
000614260 999C5 $$1Thatcher$$2Crossref$$9-- missing cx lookup --$$a10.1107/S2053273322002522$$p242 -$$tActa Crystallogr., Sect. A: Found. Adv.$$v78$$y2022
000614260 999C5 $$1Tetef$$2Crossref$$9-- missing cx lookup --$$a10.1039/D1CP02903G$$p23586 -$$tPhys. Chem. Chem. Phys.$$v23$$y2021
000614260 999C5 $$1Chapman$$2Crossref$$9-- missing cx lookup --$$a10.1107/S1600576715016532$$p1619 -$$tJ. Appl. Crystallogr.$$v48$$y2015
000614260 999C5 $$1Wold$$2Crossref$$9-- missing cx lookup --$$a10.1016/0169-7439(87)80084-9$$p37 -$$tChemom. Intell. Lab. Syst.$$v2$$y1987