<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Diabetes</journal-id><journal-id journal-id-type="publisher-id">diabetes</journal-id><journal-id journal-id-type="index">23</journal-id><journal-title>JMIR Diabetes</journal-title><abbrev-journal-title>JMIR Diabetes</abbrev-journal-title><issn pub-type="epub">2371-4379</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v10i1e69142</article-id><article-id pub-id-type="doi">10.2196/69142</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Toward a Clinically Actionable, Electronic Health Record&#x2013;Based Machine Learning Model to Forecast 90-Day Change in Hemoglobin A<sub>1c</sub> in Youth With Type 1 Diabetes: Feasibility and Model Development Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Tallon</surname><given-names>Erin M</given-names></name><degrees>PhD, RN</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Williams</surname><given-names>David D</given-names></name><degrees>MPH</degrees><xref ref-type="aff" rid="aff4">4</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Schweisberger</surname><given-names>Cintya</given-names></name><degrees>DO</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Mullaney</surname><given-names>Colin</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lockee</surname><given-names>Brent</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ferro</surname><given-names>Diana</given-names></name><degrees>MSc, PhD</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Vandervelden</surname><given-names>Craig A</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Barnes</surname><given-names>Mitchell S</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Sarteau</surname><given-names>Angelica Cristello</given-names></name><degrees>MSPH, PhD</degrees><xref ref-type="aff" rid="aff7">7</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kahkoska</surname><given-names>Anna R</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff7">7</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Patton</surname><given-names>Susana R</given-names></name><degrees>PhD, CDCES</degrees><xref ref-type="aff" rid="aff8">8</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Mehta</surname><given-names>Sanjeev</given-names></name><degrees>MD, MPH</degrees><xref ref-type="aff" rid="aff9">9</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>McDonough</surname><given-names>Ryan</given-names></name><degrees>DO</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff10">10</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lind</surname><given-names>Marcus</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff11">11</xref><xref ref-type="aff" rid="aff12">12</xref><xref ref-type="aff" rid="aff13">13</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>D'Avolio</surname><given-names>Leonard</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff5">5</xref><xref ref-type="aff" rid="aff14">14</xref><xref ref-type="aff" rid="aff15">15</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Clements</surname><given-names>Mark A</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib></contrib-group><aff id="aff1"><institution>Division of Pediatric Endocrinology and Diabetes, Children's Mercy Kansas City</institution><addr-line>2401 Gillham Road</addr-line><addr-line>Kansas City</addr-line><addr-line>MO</addr-line><country>United States</country></aff><aff id="aff2"><institution>Department of Pediatrics, UMKC School of Medicine</institution><addr-line>Kansas City</addr-line><addr-line>MO</addr-line><country>United States</country></aff><aff id="aff3"><institution>Institute for Data Science and Informatics, University of Missouri-Columbia</institution><addr-line>Columbia</addr-line><addr-line>MO</addr-line><country>United States</country></aff><aff id="aff4"><institution>Division of Health Services and Outcomes Research, Children's Mercy Kansas City</institution><addr-line>Kansas City</addr-line><addr-line>MO</addr-line><country>United States</country></aff><aff id="aff5"><institution>Blue Circle Health</institution><addr-line>Boston</addr-line><addr-line>MA</addr-line><country>United States</country></aff><aff id="aff6"><institution>Preventive and Predictive Medicine, IRCCS, Bambino Ges&#x00F9; Children's Hospital</institution><addr-line>Rome</addr-line><country>Italy</country></aff><aff id="aff7"><institution>Department of Nutrition, University of North Carolina at Chapel Hill</institution><addr-line>Chapel Hill</addr-line><addr-line>NC</addr-line><country>United States</country></aff><aff id="aff8"><institution>Center for Healthcare Delivery Science, Nemours Children's Health</institution><addr-line>Jacksonville</addr-line><addr-line>FL</addr-line><country>United States</country></aff><aff id="aff9"><institution>Joslin Diabetes Center</institution><addr-line>Boston</addr-line><addr-line>MA</addr-line><country>United States</country></aff><aff id="aff10"><institution>Department of Pediatrics, Arkansas Children's Northwest</institution><addr-line>Springdale</addr-line><addr-line>AR</addr-line><country>United States</country></aff><aff id="aff11"><institution>Department of Medicine, NU-Hospital Group</institution><addr-line>Uddevalla</addr-line><country>Sweden</country></aff><aff id="aff12"><institution>Department of Molecular and Clinical Medicine, University of Gothenburg</institution><addr-line>Gothenburg</addr-line><country>Sweden</country></aff><aff id="aff13"><institution>Department of Medicine, Sahlgrenska University Hospital</institution><addr-line>Gothenburg</addr-line><country>Sweden</country></aff><aff id="aff14"><institution>Harvard Medical School</institution><addr-line>Boston</addr-line><addr-line>MA</addr-line><country>United States</country></aff><aff id="aff15"><institution>Mass General Brigham</institution><addr-line>Boston</addr-line><addr-line>MA</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Quinlan</surname><given-names>Leo</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Gao</surname><given-names>Rui</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Mohanadas</surname><given-names>Sadhasivam</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Adhikari</surname><given-names>Soumya</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Erin M Tallon, PhD, RN, Division of Pediatric Endocrinology and Diabetes, Children's Mercy Kansas City, 2401 Gillham Road, Kansas City, MO, United States, 1 8166014023; <email>etallon@cmh.edu</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>25</day><month>9</month><year>2025</year></pub-date><volume>10</volume><elocation-id>e69142</elocation-id><history><date date-type="received"><day>10</day><month>01</month><year>2025</year></date><date date-type="rev-recd"><day>01</day><month>07</month><year>2025</year></date><date date-type="accepted"><day>23</day><month>07</month><year>2025</year></date></history><copyright-statement>&#x00A9; Erin M Tallon, David D Williams, Cintya Schweisberger, Colin Mullaney, Brent Lockee, Diana Ferro, Craig A Vandervelden, Mitchell S Barnes, Angelica Cristello Sarteau, Anna R Kahkoska, Susana R Patton, Sanjeev Mehta, Ryan McDonough, Marcus Lind, Leonard D'Avolio, Mark A Clements. Originally published in JMIR Diabetes (<ext-link ext-link-type="uri" xlink:href="https://diabetes.jmir.org">https://diabetes.jmir.org</ext-link>), 25.9.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Diabetes, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://diabetes.jmir.org/">https://diabetes.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://diabetes.jmir.org/2025/1/e69142"/><abstract><sec><title>Background</title><p>Clinicians currently lack an effective means for identifying youth with type 1 diabetes (T1D) who are at risk for experiencing glycemic deterioration between diabetes clinic visits. As a result, their ability to identify youth who may optimally benefit from targeted interventions designed to address rising glycemic levels is limited. Although electronic health records (EHR)&#x2013;based risk predictions have been used to forecast health outcomes in T1D, no study has investigated the potential for using EHR data to identify youth with T1D who will experience a clinically significant rise in glycated hemoglobin (HbA<sub>1c</sub>) &#x2265;0.3% (approximately 3 mmol/mol) between diabetes clinic visits.</p></sec><sec><title>Objective</title><p>We aimed to evaluate the feasibility of using routinely collected EHR data to develop a machine learning model to predict 90-day unit-change in HbA<sub>1c</sub> (in % units) in youth (aged 9&#x2010;18 y) with T1D. We assessed our model&#x2019;s ability to augment clinical decision-making by identifying a percent change cut point that optimized identification of youth who would experience a clinically significant rise in HbA<sub>1c</sub>.</p></sec><sec sec-type="methods"><title>Methods</title><p>From a cohort of 2757 youth with T1D who received care from a network of pediatric diabetes clinics in the Midwestern United States (January 2012-August 2017), we identified 1743 youth with 9643 HbA<sub>1c</sub> observation windows (ie, 2 HbA<sub>1c</sub> measurements separated by 70&#x2010;110 d, approximating the 90-day time interval between routine diabetes clinic visits). We used up to 5 years of youths&#x2019; longitudinal EHR data to transform 17,466 features (demographics, laboratory results, vital signs, anthropometric measures, medications, diagnosis codes, procedure codes, and free-text data) for model training. We performed 3-fold cross-validation to train random forest regression models to predict 90-day unit-change in HbA<sub>1c</sub>(%).</p></sec><sec sec-type="results"><title>Results</title><p>Across all 3 folds of our cross-validation model, the average root-mean-square error was 0.88 (95% CI 0.85&#x2010;0.90). Predicted HbA<sub>1c</sub>(%) strongly correlated with true HbA<sub>1c</sub>(%) (<italic>r</italic>=0.79; 95% CI 0.78&#x2010;0.80). The top 10 features impacting model predictions included postal code, various metrics related to HbA<sub>1c</sub>, and the frequency of a diagnosis code indicating difficulty with treatment engagement. At a clinically significant percent rise threshold of &#x2265;0.3% (approximately 3 mmol/mol), our model&#x2019;s positive predictive value was 60.3%, indicating a 1.5-fold enrichment (relative to the observed frequency that youth experienced this outcome [3928/9643, 40.7%]). Model sensitivity and positive predictive value improved when thresholds for clinical significance included smaller changes in HbA<sub>1c</sub>, whereas specificity and negative predictive value improved when thresholds required larger changes in HbA<sub>1c</sub>.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Routinely collected EHR data can be used to create an ML model for predicting unit-change in HbA<sub>1c</sub> between diabetes clinic visits among youth with T1D. Future work will focus on optimizing model performance and validating the model in additional cohorts and in other diabetes clinics.</p></sec></abstract><kwd-group><kwd>adolescent</kwd><kwd>AI, artificial intelligence</kwd><kwd>clinical decision support</kwd><kwd>EHR, electronic health records</kwd><kwd>glycemic control</kwd><kwd>HbA1c, hemoglobin A1c</kwd><kwd>machine learning</kwd><kwd>pediatric</kwd><kwd>population health</kwd><kwd>prediction</kwd><kwd>real-world data</kwd><kwd>T1D, type 1 diabetes</kwd><kwd>youth</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>Type 1 diabetes (T1D), an immune-mediated chronic disease that affects more than 1 in 300 youth in the United States, is characterized by significant to near-total loss of endogenous insulin production [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Given insulin&#x2019;s critical role in maintaining glucose homeostasis, the most immediate and pervasive downstream effect of insulin deficiency is persistent, life-threatening hyperglycemia that must be identified through frequent glucose monitoring and managed with lifelong administration of exogenous insulin [<xref ref-type="bibr" rid="ref1">1</xref>].</p><p>Youth with T1D attend routine (often quarterly) diabetes clinic visits where clinicians use glycated hemoglobin (HbA<sub>1c</sub>) testing to assess glycemic status [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. Considered the gold standard for monitoring long-term glycemia in diabetes, HbA<sub>1c</sub> testing provides an objective measure of an individual&#x2019;s mean blood glucose during the previous 2&#x2010;3 months [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. To achieve glycemic goals, youth with T1D are increasingly being encouraged to adopt sophisticated diabetes technologies, such as hybrid closed-loop insulin pumps and continuous glucose monitoring (CGM) systems [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. Concurrent with the rising availability of these technologies and a strong research base linking HbA<sub>1c</sub> with the development of diabetes complications, the American Diabetes Association has incrementally lowered its recommended HbA<sub>1c</sub> goals for youth with diabetes [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref8">8</xref>].</p><p>Despite increased adoption of advanced diabetes technologies over time, data from the T1D Exchange indicated that between 2010&#x2010;2012 and 2016&#x2010;2018, mean HbA<sub>1c</sub> in US youth with T1D rose from 7.8% (62 mmol/mol) to 8.4% (68 mmol/mol); and in 2016&#x2010;2018, only 16% (686/4346) of youth were meeting the American Diabetes Association&#x2019;s (then) recommended HbA<sub>1c</sub> goal of &#x003C;7.5% (&#x003C;58 mmol/mol) [<xref ref-type="bibr" rid="ref6">6</xref>]. A separate analysis of 2015&#x2010;2016 data indicated that fewer than 20% (1817/9685) of US youth with T1D less than the age of 18 years had an HbA<sub>1c</sub>&#x003C;7.5% (58 mmol/mol); and fewer than 10% (690/9685) of youth had an HbA<sub>1c</sub>&#x003C;7% (53 mmol/mol) [<xref ref-type="bibr" rid="ref9">9</xref>]. Previous research has shown that 1 in 5 youth with T1D experience an increasing HbA<sub>1c</sub> trajectory between the ages of 8 and 18 years [<xref ref-type="bibr" rid="ref10">10</xref>].</p><p>Through a phenomenon known as &#x201C;metabolic memory,&#x201D; periods of hyperglycemia are known to increase risk for diabetes-related microvascular and macrovascular complications for &#x003E;10 years following initial exposure [<xref ref-type="bibr" rid="ref11">11</xref>]. A similar&#x2014;but beneficial&#x2014;legacy effect is observed in individuals with T1D who are exposed to near-normal glycemia and later experience more favorable long-term diabetes outcomes, even when glycemic levels later rise [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. These findings point to a critical need to optimize the early identification of youth who are candidates for targeted interventions to improve deteriorating glycemia.</p><p>The increasing availability of real-world clinical data housed in electronic health records (EHR) is generating opportunities to investigate population-level health outcomes, develop classification and risk prediction models to augment clinical decision-making, and accelerate diagnostic and therapeutic discovery [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref15">15</xref>]. Machine learning (ML) has been used to meaningfully advance understanding of numerous clinical outcomes in individuals with diabetes [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref18">18</xref>], and EHR-based risk predictions have been leveraged to generate insights across the health-disease spectrum, including T1D [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref21">21</xref>].</p><p>Given the multifactorial etiology of rising glycemic levels in youth with T1D, it remains difficult to identify youth who are at the highest risk of experiencing increased HbA<sub>1c</sub> between routine diabetes clinic visits. To date, no study has investigated the feasibility of or potential for using EHR data to develop a predictive model to identify youth with T1D who will experience a clinically significant rise in HbA<sub>1c</sub> between clinic visits. Such a model could augment clinical decision-making and facilitate initiation of interventions that increase behaviors known to improve glycemia in high-risk youth.</p></sec><sec id="s1-2"><title>Objective</title><p>We sought to evaluate the feasibility of using ML to identify youth (aged 9&#x2010;18 y) with T1D who were candidates for behavioral and care delivery interventions designed to reduce or prevent a predicted rise in HbA<sub>1c</sub>. To do so, we used routinely collected EHR data to develop an interpretable and clinically actionable ML model to forecast unit-change (ie, increase or decrease, in % units) in HbA<sub>1c</sub> in 90 days. We then evaluated the ability of our model to augment clinical decision-making by identifying a percent-change cut point that optimized identification of youth who experienced a clinically significant rise in HbA<sub>1c</sub> at their subsequent diabetes clinic encounter.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design</title><p>We applied the random forest (RF) regression algorithm to longitudinal EHR data to develop a model to forecast 90-day unit-change in HbA<sub>1c</sub> (in % units). We used RF due to its utility for constructing accurate, noise-resilient ML models from high-dimensional data [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. To evaluate our model&#x2019;s ability to identify youth who, based on predicted rise in HbA<sub>1c</sub>, were true candidates for intervention, we evaluated the sensitivity, specificity, positive predictive value (PPV), and negative predictive value (NPV) of predicted versus actual change in HbA<sub>1c</sub> at several cut points: &#x2265;0.3%, &#x2265;0.4%, &#x2265;0.5%, and &#x2265;0.6% (approximately 3 mmol/mol, 4 mmol/mol, 5 mmol/mol, and 7 mmol/mol, respectively).</p></sec><sec id="s2-2"><title>Source Data and Study Cohort</title><p>Using data extracted from Oracle Health EHR (formerly Cerner Millenium Electronic Medical Record System; Nashville, Tennessee) [<xref ref-type="bibr" rid="ref24">24</xref>], we used diagnosis code and laboratory data to identify a cohort of 2757 youth with T1D who received care from a network of pediatric diabetes clinics in the Midwestern United States between January 2012 and August 2017. Criteria used to identify this T1D cohort are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s2-3"><title>HbA<sub>1c</sub> Measurements and Observation Windows</title><p>For youth with T1D, we identified health encounters that were associated with HbA<sub>1c</sub> measurements (ie, laboratory and point-of-care HbA<sub>1c</sub> measurements) and HbA<sub>1c</sub> observation windows that met inclusion criteria. Each HbA<sub>1c</sub> observation window comprised 2 documented HbA<sub>1c</sub> measurements (from a single individual) separated by a time interval of 70&#x2010;110 days. The 70&#x2010; to 110-day time interval was selected to approximate the 3-month (ie, 90-day) time interval between regularly scheduled diabetes clinic visits.</p><p>Certain encounters with HbA<sub>1c</sub> data were excluded from consideration and therefore not included in any HbA<sub>1c</sub> observation windows. HbA<sub>1c</sub> values documented at or shortly after T1D diagnosis tend to be more extreme than those documented at subsequent time points (ie, after an individual with T1D begins receiving regular insulin injections) [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. As such, each youth&#x2019;s first-documented encounter with an HbA<sub>1c</sub> value was excluded under the assumption that a youth&#x2019;s first HbA<sub>1c</sub> measurement may have been obtained at the time of T1D diagnosis. We also excluded data from encounters where youth were &#x003C;9 years old, as the incidence of clinically significant rise in HbA<sub>1c</sub> is less common in this age group [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref27">27</xref>].</p><p>We excluded observation windows associated with HbA<sub>1c</sub> measurements that were separated by &#x003C;70 days or &#x003E;110 days, as well as those where the first encounter for a given HbA<sub>1c</sub> observation window (ie, the index encounter) was associated with an HbA<sub>1c</sub> of &#x003E;12% (&#x003E;108 mmol/mol). The latter exclusion criterion was used because individuals with an HbA<sub>1c</sub> of &#x003E;12% (&#x003E;108 mmol/mol) were already considered ideal candidates for intervention. Encounter-level data from all HbA<sub>1c</sub> observation windows that met inclusion criteria were included in our final dataset, which could include data from multiple HbA<sub>1c</sub> observation windows per individual.</p></sec><sec id="s2-4"><title>Outcome Definition</title><p>The forecasted outcome was unit-change in HbA<sub>1c</sub> (in % units) at the end of 90 days. After predicting each youth&#x2019;s percent change in HbA<sub>1c</sub> in 90 days (ie, at the time of the follow-up encounter), we used various thresholds to determine an HbA<sub>1c</sub> percent rise cut point that optimized identification of individuals who were true candidates for intervention at the time of their index encounter: &#x2265;0.3%, &#x2265;0.4%, &#x2265;0.5%, and &#x2265;0.6% (approximately 3 mmol/mol, 4 mmol/mol, 5 mmol/mol, and 7 mmol/mol, respectively). We considered these cut points to be clinically relevant and actionable, given that a long-term decrease of &#x2265;0.3% (3 mmol/mol) in HbA<sub>1c</sub> is associated with reduced risk of long-term diabetes complications [<xref ref-type="bibr" rid="ref28">28</xref>].</p></sec><sec id="s2-5"><title>Data Extraction</title><p>We used SQL queries to comprehensively extract up to approximately 5 years (January 2012-August 2017) of structured and unstructured EHR data for each youth with index and follow-up encounter data for at least 1 qualifying HbA<sub>1c</sub> observation window. These data included demographics, laboratory results, vital signs, anthropometric measures, encounter locations, medications, diagnosis codes, procedure codes, structured clinical vocabulary codes, and free-text data from diabetes- and non&#x2013;diabetes-related clinical notes, messages, and reports.</p><p>Demographic data included sex (female, male), age, ethnicity (non-Hispanic, Hispanic), race (White, Black or African American, Asian, American Indian or Alaska Native, Native Hawaiian or Pacific Islander, and other), primary language (eg, English or Spanish), health plan type; and postal code (3- and 4-digit postal code prefixes). Additional extracted data included up to approximately 5 years of all available laboratory test results, clinical event and observation data, vital signs (heart rate, respiratory rate, oxygen saturation, and blood pressure), anthropometric measures (weight, height, and BMI), and medications (mapped to standard generic drug names [<xref ref-type="bibr" rid="ref29">29</xref>]). We also extracted diagnosis codes (ie, <italic>ICD-9</italic> [<italic>International Classification of Diseases, Ninth Revision</italic>], ICD-10 [<italic>International Statistical Classification of Diseases, Tenth Revision</italic>], and Systematized Nomenclature of Medicine Clinical Terms [SNOMED CT] codes), procedure codes (ie, Current Procedural Terminology [CPT] codes); and other structured clinical vocabulary codes (ie, SNOMED CT).</p><p>We chose not to include data generated by diabetes devices (eg, automated insulin delivery and CGM systems). Early on, we observed that HbA<sub>1c</sub> was easiest to predict in youth who used diabetes devices that generate diabetes data (eg, glucose levels) in real time. However, since most diabetes centers do not have broad or ready access to device data in near-real time, we sought to evaluate the potential of using only EHR data to predict HbA<sub>1c</sub>.</p></sec><sec id="s2-6"><title>Feature Engineering</title><p>We engineered features using data documented during all available historical encounters, as well as during HbA<sub>1c</sub> observation window index and follow-up encounters. Processes used to transform variables into features for ML varied by data type. In all, our feature engineering processes generated 17,466 input features for model fitting.</p></sec><sec id="s2-7"><title>Numeric Variables</title><p>For numeric variables (eg, laboratory results, weight, and vital signs), we created features by calculating summary metrics (ie, mean, slope, and SD). In general, we created 2 sets of features for each numeric variable, based on proximity of the measurements to the HbA<sub>1c</sub> observation window&#x2019;s index encounter. One set of features was created using data documented during the 12 months preceding (and at) the index encounter. A second set was created using all available EHR data documented before (and at) the index encounter. For example, we created 2 features for mean HbA<sub>1c</sub>: one calculated using the previous 12 months of HbA<sub>1c</sub> data (up to and including the index encounter), and the other calculated using all available HbA<sub>1c</sub> data (up to and including the index encounter). Given the intrinsic insensitivity of RF to numerical outliers, we did not alter or drop outliers from the data. Once all numerical features were created, missing numerical values were imputed using the population median.</p><p>Each youth&#x2019;s diagnostic (ie, first) HbA<sub>1c</sub> result was included as a separate feature, as was the HbA<sub>1c</sub> result documented at the observation window&#x2019;s index encounter. Because research suggests that youth with T1D can be grouped into one of several HbA<sub>1c</sub> trajectory clusters [<xref ref-type="bibr" rid="ref10">10</xref>], we created an HbA<sub>1c</sub> trajectory feature by using k-means clustering [<xref ref-type="bibr" rid="ref30">30</xref>] to assign youth to 1 of 4 clusters based on their quarterly HbA<sub>1c</sub> measurements.</p></sec><sec id="s2-8"><title>Categorical Variables</title><p>We used data documented at the observation window&#x2019;s index encounter to create features from demographic data (eg, age, race, ethnicity, primary language, health plan type, and postal code). For each categorical demographic variable, we used the StringIndexer feature transformer to convert the categories associated with each variable into numeric indices, thus creating a single feature for each of these variables [<xref ref-type="bibr" rid="ref31">31</xref>].</p><p>We used Clinical Classification Software Revised (CCSR), developed by the Agency for Healthcare Research and Quality, to group <italic>ICD-10</italic> codes into meaningful categories [<xref ref-type="bibr" rid="ref32">32</xref>]. Thereafter, each CCSR category and each <italic>ICD-9</italic>, <italic>ICD-10</italic>, SNOMED CT, and CPT code was treated as a separate variable. We created 2 sets of features for each of these separate variables, based on how many times each had been assigned to the individual relative to the observation window&#x2019;s index encounter. One set of features was created by calculating the frequency that each had been assigned to the individual during the 12 months preceding (and at) the index encounter. The second set was created using all available EHR data documented before (and at) the index encounter. Absence of diagnosis, procedure, or structured clinical vocabulary codes was presumed to reflect true absence, rather than missingness, of these data variables.</p><p>Medication variables were similarly transformed into 2 sets of features based on how often each medication had been prescribed relative to the index encounter. One set of features was created by calculating the frequency that each medication had been prescribed to the individual during the 12 months preceding (and at) the index encounter. The second set was created using all available medication data documented before (and at) the index encounter. Encounter frequencies were similarly calculated and included as separate features. Absence of medication and encounter data was presumed to reflect true absence of these data.</p></sec><sec id="s2-9"><title>Natural Language Processing</title><p>We used term frequency&#x2013;inverse document frequency (TF-IDF) vectorization, a natural language processing technique, to process free-text data from clinical notes, messages, and reports. In TF-IDF vectorization, words (ie, tokens) are first converted into a matrix of token counts [<xref ref-type="bibr" rid="ref33">33</xref>]. The matrix is then transformed into a normalized TF-IDF representation that most heavily weights tokens that occur infrequently across the entire corpus of available text [<xref ref-type="bibr" rid="ref33">33</xref>]. As such, TF-IDF is used to assign the highest weight to words that have the most discriminating power. After ranking by weight, we constrained the total number of features generated via TF-IDF vectorization to 250 single-word terms and 250 two-word terms, each of which had to be present in at least 5 documents.</p></sec><sec id="s2-10"><title>Model Development and Evaluation</title><p>RF uses bootstrap aggregation and random feature sampling to independently train a series of uncorrelated decision tree regressors, known as &#x201C;weak learners&#x201D; [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref36">36</xref>]. Predictions from this ensemble of weak learners are averaged to produce a single &#x201C;strong learner&#x201D; with improved prediction accuracy [<xref ref-type="bibr" rid="ref23">23</xref>]. Relative to many other ML methods, the RF algorithm presents several key advantages, including decreased risk of overfitting, straightforward calculation of the degree to which individual input features contribute to model predictions, and robustness to missing data [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref34">34</xref>].</p><p>After randomly splitting the entire dataset into 3 nonoverlapping data subsets, we used 3-fold cross-validation to recursively fit RF regressors to 2 of the 3 subsets and then evaluate model performance on the third, held-out subset. We used 3-fold (rather than 5- or 10-fold) cross-validation due to the large number of HbA<sub>1c</sub> observation windows included in our analysis, as well as our desire to reduce variance in the estimated performance of our model. Hyperparameters used for model fitting are presented in <xref ref-type="table" rid="table1">Table 1</xref>. Model performance was evaluated by averaging the mean absolute error (MAE) and the root-mean-square error (RMSE)&#x2014;the SD of the residuals [<xref ref-type="bibr" rid="ref37">37</xref>]&#x2014;across all 3 cross-validation models.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Hyperparameter values used for random forest regressor model training. A complete list of hyperparameter keys accepted by the random forest regressor algorithm and definitions of each can be found on the web [<xref ref-type="bibr" rid="ref38">38</xref>]. Hyperparameters not listed below were set to default values.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Hyperparameter</td><td align="left" valign="bottom">Value used</td><td align="left" valign="bottom">Default value</td></tr></thead><tbody><tr><td align="left" valign="top">NumTrees</td><td align="left" valign="top">40</td><td align="left" valign="top">20</td></tr><tr><td align="left" valign="top">MaxDepth</td><td align="left" valign="top">7</td><td align="left" valign="top">5</td></tr><tr><td align="left" valign="top">MaxBins</td><td align="left" valign="top">128</td><td align="left" valign="top">32</td></tr><tr><td align="left" valign="top">MinInstancesPerNode</td><td align="left" valign="top">8</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top">FeatureSubsetStrategy</td><td align="left" valign="top">&#x201C;onethird&#x201D;</td><td align="left" valign="top">&#x201C;onethird&#x201D;</td></tr><tr><td align="left" valign="top">Impurity</td><td align="left" valign="top">&#x201C;variance&#x201D;</td><td align="left" valign="top">&#x201C;variance&#x201D;</td></tr><tr><td align="left" valign="top">MinInfoGain</td><td align="left" valign="top">0.0</td><td align="left" valign="top">0.0</td></tr><tr><td align="left" valign="top">MinWeightFractionPerNode</td><td align="left" valign="top">0.0</td><td align="left" valign="top">0.0</td></tr><tr><td align="left" valign="top">SubsamplingRate</td><td align="left" valign="top">1.0</td><td align="left" valign="top">1.0</td></tr></tbody></table></table-wrap><p>Decision tree regressors are grown by recursively splitting on features to maximize impurity reduction [<xref ref-type="bibr" rid="ref39">39</xref>]. Feature splits that reduce impurity by maximally reducing variance are considered important; thus, the features that are split to maximize reduction in variance are also deemed important [<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>]. We evaluated feature importance by calculating and ranking the mean reduction in variance associated with only those features that were used by all 3 of our cross-validation models to forecast HbA<sub>1c</sub>.</p><p>We used Python (version 3) and Scala (version 2; Programming Methods Laboratory at &#x00C9;cole Polytechnique F&#x00E9;d&#x00E9;rale de Lausanne) to clean and transform the data. ML analyses were conducted using the Apache Spark MLlib (version 2) ML library [<xref ref-type="bibr" rid="ref41">41</xref>].</p></sec><sec id="s2-11"><title>Statistical Analysis</title><p>Pearson <italic>r</italic> correlations were used to assess the strength and direction of the relationship between actual and predicted HbA<sub>1c</sub> values. We also used sensitivity, specificity, PPV, and NPV as clinical performance metrics to aid in identifying a predicted HbA<sub>1c</sub> percent rise threshold that would facilitate optimal capture of youth who would experience a clinically significant rise in HbA<sub>1c</sub> in 90 days.</p><p>Summary statistics, correlations, RMSE, MAE, and sensitivity, specificity, PPV, and NPV metrics were assessed using Stata/SE (Stata standard edition) software (version 18.5; StataCorp) [<xref ref-type="bibr" rid="ref42">42</xref>].</p></sec><sec id="s2-12"><title>Ethical Considerations</title><p>Clinical and model output data were collected and coded in an institutional review board&#x2013;approved research data repository at Children&#x2019;s Mercy Kansas City (Kansas City, Missouri; IRB #11120355) that met the requirements for a waiver of written informed consent as outlined in 45 CFR 46.116.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overview</title><p>Out of 2757 youth with T1D, 1743 youth (63.2%) had one or more HbA<sub>1c</sub> observation windows (n=9643) that met inclusion criteria (<xref ref-type="fig" rid="figure1">Figure 1</xref>).</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Flowchart depicting inclusion and exclusion criteria for the study cohort and for glycated hemoglobin observation windows. Abbreviations: HbA<sub>1c</sub>: glycated hemoglobin; T1D: type 1 diabetes.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="diabetes_v10i1e69142_fig01.png"/></fig><p>Characteristics of the entire cohort that met inclusion criteria are summarized in <xref ref-type="table" rid="table2">Table 2</xref>. The observed frequencies that youth experienced a rise in HbA<sub>1c</sub> that exceeded each percent change cut points (&#x2265;0.3%, &#x2265;0.4%, &#x2265;0.5%, and &#x2265;0.6% [approximately 3 mmol/mol, 4 mmol/mol, 5 mmol/mol, 7 mmol/mol]) were 40.7%, 35.6%, 30.8%, and 26.5%, respectively. Characteristics of observations included in each nonoverlapping K-fold are summarized in <xref ref-type="table" rid="table3">Table 3</xref>.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Demographic and clinical characteristics of 1743 youth with glycated hemoglobin observation windows that met inclusion criteria.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Demographic and clinical characteristics</td><td align="left" valign="bottom">All HbA<sub>1c</sub><sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> observation windows (n=9643)</td><td align="left" valign="bottom">Index encounter of each youth&#x2019;s first HbA<sub>1c</sub> observation window (n=1743)</td></tr></thead><tbody><tr><td align="left" valign="top">Age (y), mean (SD)</td><td align="left" valign="top">13.8 (2.6)</td><td align="left" valign="top">12.9 (2.7)</td></tr><tr><td align="left" valign="top">Sex, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;Female</td><td align="left" valign="top">4599 (47.7)</td><td align="left" valign="top">844 (48.4)</td></tr><tr><td align="left" valign="top">&#x2003;Male</td><td align="left" valign="top">5044 (52.3)</td><td align="left" valign="top">899 (51.6)</td></tr><tr><td align="left" valign="top">&#x2003;Unknown</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td></tr><tr><td align="left" valign="top">Race, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;White</td><td align="left" valign="top">8196 (85)</td><td align="left" valign="top">1449 (83.1)</td></tr><tr><td align="left" valign="top">&#x2003;Black or African American</td><td align="left" valign="top">616 (6.4)</td><td align="left" valign="top">133 (7.6)</td></tr><tr><td align="left" valign="top">&#x2003;Asian</td><td align="left" valign="top">53 (0.5)</td><td align="left" valign="top">12 (0.7)</td></tr><tr><td align="left" valign="top">&#x2003;American Indian or Alaska Native</td><td align="left" valign="top">42 (0.4)</td><td align="left" valign="top">8 (0.5)</td></tr><tr><td align="left" valign="top">&#x2003;Native Hawaiian or Pacific Islander</td><td align="left" valign="top">8 (0.1)</td><td align="left" valign="top">3 (0.2)</td></tr><tr><td align="left" valign="top">&#x2003;Other</td><td align="left" valign="top">63 (0.7)</td><td align="left" valign="top">10 (0.6)</td></tr><tr><td align="left" valign="top">&#x2003;Unknown</td><td align="left" valign="top">665 (6.9)</td><td align="left" valign="top">128 (7.3)</td></tr><tr><td align="left" valign="top">Ethnicity, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;Non-Hispanic or non-Latino</td><td align="left" valign="top">8978 (93.1)</td><td align="left" valign="top">1620 (93.0)</td></tr><tr><td align="left" valign="top">&#x2003;Hispanic or Latino</td><td align="left" valign="top">656 (6.8)</td><td align="left" valign="top">121 (6.9)</td></tr><tr><td align="left" valign="top">&#x2003;Unknown</td><td align="left" valign="top">9 (0.1)</td><td align="left" valign="top">2 (0.1)</td></tr><tr><td align="left" valign="top">HbA<sub>1c</sub> at index encounter (%), mean (SD)</td><td align="left" valign="top">8.6 (1.3)</td><td align="left" valign="top">8.5 (1.5)</td></tr><tr><td align="left" valign="top">HbA<sub>1c</sub> at index encounter (mmol/mol), mean (SD)</td><td align="left" valign="top">70 (14.2)</td><td align="left" valign="top">69 (16.4)</td></tr><tr><td align="left" valign="top">Change in HbA<sub>1c</sub><sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup> (%), median (IQR)</td><td align="left" valign="top">0.1 (&#x2013;0.4 to 0.6)</td><td align="left" valign="top">0.1 (&#x2013;0.4 to 0.7)</td></tr><tr><td align="left" valign="top">Change in HbA<sub>1c</sub><sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup> (mmol/mol), median (IQR)</td><td align="left" valign="top">1 (&#x2013;4 to 7)</td><td align="left" valign="top">1 (&#x2013;4 to 8)</td></tr><tr><td align="left" valign="top">HbA<sub>1c</sub> increase, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;&#x2265;0.3%</td><td align="left" valign="top">3928 (40.7)</td><td align="left" valign="top">763 (43.8)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2265;0.4%</td><td align="left" valign="top">3435 (35.6)</td><td align="left" valign="top">662 (38)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2265;0.5%</td><td align="left" valign="top">2966 (30.8)</td><td align="left" valign="top">580 (33.3)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2265;0.6%</td><td align="left" valign="top">2552 (26.5)</td><td align="left" valign="top">498 (28.6)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>HbA<sub>1c</sub>: glycated hemoglobin.</p></fn><fn id="table2fn2"><p><sup>b</sup>Change in HbA<sub>1c</sub>: (HbA<sub>1c</sub> at the observation window&#x2019;s follow-up encounter)&#x2013;(HbA<sub>1c</sub> at the observation window&#x2019;s index encounter).</p></fn></table-wrap-foot></table-wrap><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Demographic and clinical characteristics of youth with glycated hemoglobin observation windows included in each K-fold subset.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Demographic and clinical characteristics</td><td align="left" valign="bottom">HbA<sub>1c</sub><sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> observation windows: fold 1 (n=3151)</td><td align="left" valign="bottom">HbA<sub>1c</sub> observation windows: fold 2 (n=3129)</td><td align="left" valign="bottom">HbA<sub>1c</sub> observation windows: fold 3 (n=3363)</td></tr></thead><tbody><tr><td align="left" valign="top">Youth, n (%)</td><td align="left" valign="top">1291 (41.0)</td><td align="left" valign="top">1288 (41.2)</td><td align="left" valign="top">1381 (41.1)</td></tr><tr><td align="left" valign="top">Age (y), mean (SD)</td><td align="left" valign="top">13.9 (2.6)</td><td align="left" valign="top">13.8 (2.6)</td><td align="left" valign="top">13.8 (2.6)</td></tr><tr><td align="left" valign="top">Sex, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;Female</td><td align="left" valign="top">1534 (48.7)</td><td align="left" valign="top">1488 (47.6)</td><td align="left" valign="top">1577 (46.9)</td></tr><tr><td align="left" valign="top">&#x2003;Male</td><td align="left" valign="top">1617 (51.3)</td><td align="left" valign="top">1641 (52.4)</td><td align="left" valign="top">1786 (53.1)</td></tr><tr><td align="left" valign="top">&#x2003;Unknown</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td></tr><tr><td align="left" valign="top">Race, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;White</td><td align="left" valign="top">2690 (85.4)</td><td align="left" valign="top">2658 (85.0)</td><td align="left" valign="top">2848 (84.7)</td></tr><tr><td align="left" valign="top">&#x2003;Black or African American</td><td align="left" valign="top">174 (5.6)</td><td align="left" valign="top">206 (6.6)</td><td align="left" valign="top">236 (7.0)</td></tr><tr><td align="left" valign="top">&#x2003;Asian</td><td align="left" valign="top">14 (0.4)</td><td align="left" valign="top">16 (0.5)</td><td align="left" valign="top">23 (0.7)</td></tr><tr><td align="left" valign="top">&#x2003;American Indian or Alaska Native</td><td align="left" valign="top">17 (0.5)</td><td align="left" valign="top">13 (0.4)</td><td align="left" valign="top">12 (0.4)</td></tr><tr><td align="left" valign="top">&#x2003;Native Hawaiian or Pacific Islander</td><td align="left" valign="top">3 (0.1)</td><td align="left" valign="top">3 (0.1)</td><td align="left" valign="top">2 (0.1)</td></tr><tr><td align="left" valign="top">&#x2003;Other</td><td align="left" valign="top">17 (0.5)</td><td align="left" valign="top">21 (0.6)</td><td align="left" valign="top">25 (0.7)</td></tr><tr><td align="left" valign="top">&#x2003;Unknown</td><td align="left" valign="top">236 (7.5)</td><td align="left" valign="top">212 (6.8)</td><td align="left" valign="top">217 (6.4)</td></tr><tr><td align="left" valign="top">Ethnicity, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;Non-Hispanic or non-Latino</td><td align="left" valign="top">2911 (92.4)</td><td align="left" valign="top">2928 (93.6)</td><td align="left" valign="top">3139 (93.3)</td></tr><tr><td align="left" valign="top">&#x2003;Hispanic or Latino</td><td align="left" valign="top">236 (7.5)</td><td align="left" valign="top">197 (6.3)</td><td align="left" valign="top">223 (6.6)</td></tr><tr><td align="left" valign="top">&#x2003;Unknown</td><td align="left" valign="top">4 (0.1)</td><td align="left" valign="top">4 (0.1)</td><td align="left" valign="top">1 (0.1)</td></tr><tr><td align="left" valign="top">HbA<sub>1c</sub><sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> at index encounter (%), mean (SD)</td><td align="left" valign="top">8.6 (1.3)</td><td align="left" valign="top">8.6 (1.3)</td><td align="left" valign="top">8.6 (1.3)</td></tr><tr><td align="left" valign="top">HbA<sub>1c</sub> at index encounter (mmol/mol), mean (SD)</td><td align="left" valign="top">70 (14)</td><td align="left" valign="top">70 (14)</td><td align="left" valign="top">70 (14)</td></tr><tr><td align="left" valign="top">HbA<sub>1c</sub> increase &#x2265;0.3%, n (%)</td><td align="left" valign="top">1255 (39.8)</td><td align="left" valign="top">1293 (41.3)</td><td align="left" valign="top">1380 (41)</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>HbA<sub>1c</sub>: glycated hemoglobin.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2"><title>Model Performance</title><p>Across all 3 folds of our cross-validation model, average RMSE was 0.88 (<xref ref-type="fig" rid="figure2">Figure 2</xref>). Thus, in 68% (6557/9643) of cases (representing one SD), our predictions were within &#x00B1;0.88% (95% CI 0.85&#x2010;0.90) of the true percent change in HbA<sub>1c</sub>. The average MAE across all 3 folds was 0.64 (95% CI 0.63&#x2010;0.65). Predicted HbA<sub>1c</sub>(%) strongly correlated with true HbA<sub>1c</sub>(%; <italic>r</italic>=0.79; 95% CI 0.78&#x2010;0.80).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Distribution of the prediction error (ie, residuals) across all 3 cross-validation K-folds. Root-mean-square error is equal to the SD of the prediction error. RMSE: root-mean-square error.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="diabetes_v10i1e69142_fig02.png"/></fig></sec><sec id="s3-3"><title>Feature Importance</title><p>Across all 3 folds of our cross-validation model, the top 10 features identified as having the greatest impact on model predictions included postal code, various metrics related to HbA<sub>1c</sub>, and the number of times that the individual had been assigned a diagnosis code indicating difficulty with treatment engagement (<xref ref-type="fig" rid="figure3">Figure 3</xref>). The top 30 most important features used to predict percent change in HbA<sub>1c</sub> are in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Top 10 most important features for predicting 90-day percent change in glycated hemoglobin, assessed via gain-based feature importance. In random forest regression, gain is a feature importance measure that reflects, for a given feature, the mean increase in node purity (ie, mean reduction in variance) that the feature contributes across all splits in which it is used. Z91.19 is a diagnosis code from the <italic>ICD-10</italic> (<italic>International Classification of Diseases, Tenth Revision</italic>), that is used to code for nonadherence to, or noncompliance with, medical treatment. Dx: diagnosis; HbA<sub>1c</sub>: hemoglobin A<sub>1c</sub>.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="diabetes_v10i1e69142_fig03.png"/></fig></sec><sec id="s3-4"><title>Percent Change Cut Points</title><p>Our cross-validation model&#x2019;s ability to accurately predict change in HbA<sub>1c</sub> at various percent change cut points is illustrated in <xref ref-type="table" rid="table4">Table 4</xref>. At each percent change cut point (&#x2265;0.3%, &#x2265;0.4%, &#x2265;0.5%, and &#x2265;0.6% [approximately 3 mmol/mol, 4 mmol/mol, 5 mmol/mol, 7 mmol/mol]), PPV was 60.3%, 56.4%, 52.7%, and 53.1%, respectively, indicating an approximately 1.5- to 2-fold enrichment (relative to the observed frequency of each outcome [<xref ref-type="table" rid="table1">Table 1</xref>]) for identifying youth who would experience a clinically significant rise in HbA<sub>1c</sub>. Sensitivity and PPV improved when predictions involved smaller changes in HbA<sub>1c</sub>, whereas specificity and NPV improved when predictions involved larger changes in HbA<sub>1c</sub>. Sensitivity, specificity, PPV, and NPV metrics for each K-fold are in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Sensitivity, specificity, positive predictive value, and negative predictive value of predicted versus true percent change in HbA<sub>1c</sub> across all 3 cross-validation K-folds.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model metrics at each percent change cut point</td><td align="left" valign="bottom">Estimate, % (95% CI)</td></tr></thead><tbody><tr><td align="left" valign="top">Predicted HbA<sub>1c</sub><sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup> % change: &#x2265;0.3%</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;Sensitivity (True HbA<sub>1c</sub>% change: &#x2265;0.3%)</td><td align="left" valign="top">28.7 (27.3-30.2)</td></tr><tr><td align="left" valign="top">&#x2003;Specificity (True HbA<sub>1c</sub>% change: &#x2265;0.3%)</td><td align="left" valign="top">87 (86.1-87.9)</td></tr><tr><td align="left" valign="top">&#x2003;PPV<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup> (True HbA<sub>1c</sub>% change: &#x2265;0.3%)</td><td align="left" valign="top">60.3 (58.1-62.5)</td></tr><tr><td align="left" valign="top">&#x2003;NPV<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup> (True HbA<sub>1c</sub>% change: &#x2265;0.3%)</td><td align="left" valign="top">64 (62.9-65)</td></tr><tr><td align="left" valign="top">Predicted HbA<sub>1c</sub>% change: &#x2265;0.4%</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;Sensitivity (True HbA<sub>1c</sub>% change: &#x2265;0.4%)</td><td align="left" valign="top">17.4 (16.1-18.7)</td></tr><tr><td align="left" valign="top">&#x2003;Specificity (True HbA<sub>1c</sub>% change: &#x2265;0.4%)</td><td align="left" valign="top">92.6 (91.9-93.2)</td></tr><tr><td align="left" valign="top">&#x2003;PPV (True HbA<sub>1c</sub>% change: &#x2265;0.4%)</td><td align="left" valign="top">56.4 (53.3-59.4)</td></tr><tr><td align="left" valign="top">&#x2003;NPV (True HbA<sub>1c</sub>% change: &#x2265;0.4%)</td><td align="left" valign="top">66.9 (65.9-67.9)</td></tr><tr><td align="left" valign="top">Predicted HbA<sub>1c</sub>% change: &#x2265;0.5%</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;Sensitivity (True HbA<sub>1c</sub>% change: &#x2265;0.5%)</td><td align="left" valign="top">10 (8.9-11.1)</td></tr><tr><td align="left" valign="top">&#x2003;Specificity (True HbA<sub>1c</sub>% change: &#x2265;0.5%)</td><td align="left" valign="top">96 (95.5-96.5)</td></tr><tr><td align="left" valign="top">&#x2003;PPV (True HbA<sub>1c</sub>% change: &#x2265;0.5%)</td><td align="left" valign="top">52.7 (48.4-56.9)</td></tr><tr><td align="left" valign="top">&#x2003;NPV (True HbA<sub>1c</sub>% change: &#x2265;0.5%)</td><td align="left" valign="top">70.6 (69.6-71.5)</td></tr><tr><td align="left" valign="top">Predicted HbA<sub>1c</sub>% change: &#x2265;0.6%</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">&#x2003;Sensitivity (True HbA<sub>1c</sub>% change: &#x2265;0.6%)</td><td align="left" valign="top">6.1 (5.2-7.1)</td></tr><tr><td align="left" valign="top">&#x2003;Specificity (True HbA<sub>1c</sub>% change: &#x2265;0.6%)</td><td align="left" valign="top">98.1 (97.7-98.4)</td></tr><tr><td align="left" valign="top">&#x2003;PPV (True HbA<sub>1c</sub>% change: &#x2265;0.6%)</td><td align="left" valign="top">53.1 (47.2-58.9)</td></tr><tr><td align="left" valign="top">&#x2003;NPV (True HbA1% change: &#x2265;0.6%)</td><td align="left" valign="top">74.4 (73.5-75.3)</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>HbA<sub>1c</sub>: glycated hemoglobin.</p></fn><fn id="table4fn2"><p><sup>b</sup>PPV: positive predictive value (it is the probability that the cases predicted to experience clinically significant rise in HbA<sub>1c</sub> [at or above each percent rise threshold] did experience that outcome).</p></fn><fn id="table4fn3"><p><sup>c</sup>NPV: negative predictive value (it is the probability that the cases not predicted to experience clinically significant rise in HbA<sub>1c</sub> [at or above each percent rise threshold] did not experience that outcome).</p></fn></table-wrap-foot></table-wrap></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>We used routinely collected EHR data, including both structured and unstructured data, to establish the feasibility of constructing an interpretable ML model for predicting unit-change in HbA<sub>1c</sub> (in % units) between quarterly diabetes clinic visits among youth (aged 9&#x2010;18 y) with T1D. For those predicted to experience a &#x2265;0.3% (approximately 3 mmol/mol) rise in HbA<sub>1c</sub> during the following 3 months, PPV was 60.3%, indicating a 1.5-fold enrichment (relative to the observed frequency [40.7%] of this outcome) for identifying youth who would experience a clinically significant rise in HbA<sub>1c</sub>. This finding, which suggests that EHR data may be useful for identifying youth who will experience rising glycemic levels, is clinically relevant given that a long-term increase of &#x2265;0.3% (3 mmol/mol) in HbA<sub>1c</sub> is associated with increased risk for long-term complications of diabetes [<xref ref-type="bibr" rid="ref28">28</xref>].</p><p>Another key finding was that our model&#x2019;s sensitivity and PPV were higher when the predicted percent rise threshold was lower (eg, &#x2265;0.3% vs &#x2265;0.4%), whereas specificity and NPV were increased at higher predicted percent rise thresholds (eg, &#x2265;0.4% vs &#x2265;0.3%). We hypothesized that using a higher percent rise threshold would decrease the likelihood of false positives (ie, identifying a youth as someone who would experience a corresponding rise in HbA<sub>1c</sub> when they did not), and the data supported this conclusion. On the other hand, using a lower percent rise threshold reduced the likelihood of missing those who would experience a clinically significant rise in HbA<sub>1c</sub>. If confirmed in future studies, these findings suggest that using the lowest clinically significant threshold may be useful for guiding clinical decision-making and subsequent initiation of interventions designed to mitigate rising glycemic levels.</p><p>We also evaluated our model&#x2019;s ability to augment clinical decision-making by using PPV and NPV to identify a percent-change cut point that optimized identification of youth who experienced a clinically significant rise in HbA<sub>1c</sub> at their subsequent diabetes clinic encounter. Although PPV and NPV are considered the metrics of choice for clinical decision-making at the level of an individual person, the selection of desirable PPV and NPV values in a particular use case depends on numerous factors. These factors include considerations about short- and long-term burdens and costs related to over- or undertreatment, associated psychological impacts on individuals receiving care, and short- and long-term costs imposed on the health care system (eg, for increased staffing resources) [<xref ref-type="bibr" rid="ref43">43</xref>]. Therefore, before implementing this model clinically, it would be important to allow clinicians to provide feedback about the most appropriate thresholds for defining clinically significant rise in HbA<sub>1c</sub>, along with associated PPV and NPV values. For this work, we propose using the &#x2265;0.3% cut-point to maximize capture of high-risk youth who are candidates for behavioral and care delivery interventions designed to reduce or prevent predicted rise in HbA<sub>1c</sub>.</p><p>The top features impacting our model&#x2019;s predictions (ie, postal code, numerous metrics pertaining to HbA<sub>1c</sub>, and history of low treatment engagement) have been shown in previous studies to be associated with elevated glycemic levels. Ample evidence suggests associations between geographic location and geographically linked measures of socioeconomic status (eg, area deprivation, social deprivation, and child opportunity indices) and T1D outcomes, including glycemic levels and diabetic ketoacidosis [<xref ref-type="bibr" rid="ref44">44</xref>-<xref ref-type="bibr" rid="ref47">47</xref>]. Previous HbA<sub>1c</sub> measurements have also been shown to significantly impact ML-based predictions of future HbA<sub>1c</sub>, but previous investigations have only examined this in adults with type 2 diabetes (T2D) [<xref ref-type="bibr" rid="ref48">48</xref>]. Finally, lower treatment engagement has been shown to have a substantial impact on HbA<sub>1c</sub> in youth with T1D [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref50">50</xref>]. This evidence collectively underscores the critical need for members of the diabetes care team to partner with affected youth and families to identify resources and tailored strategies for optimizing diabetes self-management behaviors.</p><p>Given the widespread use of EHRs in clinical care, as well as the growing volume and availability of these data, there exists tremendous potential for using EHR data to identify and personalize care pathways for improving health outcomes in T1D. Previous work has applied ML to EHR data, for example, to predict the onset of T1D in youth [<xref ref-type="bibr" rid="ref51">51</xref>], as well as diabetic ketoacidosis in both youth and adults with T1D [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref52">52</xref>]. Recent research has focused on applying numerous ML classifiers to medical encounter data to predict HbA<sub>1c</sub> in individuals with T2D [<xref ref-type="bibr" rid="ref48">48</xref>]. The area under the receiver operating curve for each of the top 5 best-performing classifiers in the aforementioned study was extremely high (&#x003E;0.95). Of note, however, these model predictions were binary (ie, HbA<sub>1c</sub> &#x003C;7% [&#x003C;53 mmol/mol] vs &#x2265;7% [&#x2265;53 mmol/mol]) rather than continuous and were evaluated in a primarily adult Chinese cohort diagnosed with T2D, limiting generalizability to other populations. Our approach is designed to predict unit change in HbA<sub>1c</sub> and to give clinicians a simple output (ie, HbA<sub>1c</sub> will or will not increase by &#x2265;0.3%) for interpretation. This study is the first to use EHR data to predict a clinically significant rise in HbA<sub>1c</sub> in youth with T1D.</p><p>Recent efforts have also explored the use of ML classifiers that use 2 weeks of CGM data to forecast 90-day HbA<sub>1c</sub> in youth with T1D [<xref ref-type="bibr" rid="ref53">53</xref>]. The first of these studies used a nested, ensemble learning approach to iteratively predict HbA<sub>1c</sub> in stages: (1) HbA<sub>1c</sub> &#x2264;7.5% (58 mmol/mol) or &#x003E;7.5% (stage 1), (2) HbA<sub>1c</sub> &#x2264;9% (75 mmol/mol) or &#x003E;9% (stage 2, after stage 1 was complete), and (3) HbA<sub>1c</sub> &#x2264;12.5% (113 mmol/mol) or &#x003E;12.5% (stage 3, after stage 2 was complete) [<xref ref-type="bibr" rid="ref54">54</xref>]. A subsequent study used few-shot learning followed by K-nearest neighbors to classify transformed images of CGM time series data into multiclass HbA<sub>1c</sub> intervals [<xref ref-type="bibr" rid="ref55">55</xref>]. Generalizability of these HbA<sub>1c</sub> prediction efforts is limited, however, by these methods&#x2019; dependence on CGM data and by racial disparities in the relationship between CGM metrics and HbA<sub>1c</sub> [<xref ref-type="bibr" rid="ref56">56</xref>].</p><p>Currently, CGM systems are neither accessible to nor used by all individuals with T1D. Recent data from the T1D Exchange Quality Improvement Collaborative suggest that only 40%&#x2010;50% of US youth with T1D currently use CGM systems [<xref ref-type="bibr" rid="ref57">57</xref>,<xref ref-type="bibr" rid="ref58">58</xref>]. Reasons for this are multifactorial and can include reluctance to use CGM technologies, financial constraints, lack of insurance coverage, device-related skin complications, CGM alarm fatigue, and sociodemographic and racial or ethnic disparities in access that adversely impact use of diabetes technologies [<xref ref-type="bibr" rid="ref58">58</xref>-<xref ref-type="bibr" rid="ref62">62</xref>]. At this time, CGM data also remain notably absent from most EHRs, are distributed across multiple proprietary commercial software, and are difficult for health systems to access. Although efforts to integrate CGM data into the EHR remain ongoing [<xref ref-type="bibr" rid="ref63">63</xref>,<xref ref-type="bibr" rid="ref64">64</xref>], large-scale implementation of these efforts will hinge on the development of CGM-related data standards and a data architecture that supports this integration [<xref ref-type="bibr" rid="ref65">65</xref>].</p><p>In contrast, EHR data are routinely collected on every person receiving care from a given health care institution. These data thus provide a rich, longitudinal source of individual- and population-level health data that can be leveraged in near real-time for ML-driven clinical decision support [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref66">66</xref>]. Even so, the potential for integrating EHR-based ML-driven analytics in health care remains largely unrealized. A 2020 systematic review evaluating the number of clinical prediction models that have been embedded into EHRs noted that fewer than 45 such examples have been published [<xref ref-type="bibr" rid="ref67">67</xref>]. Of note, only 36% (16/45) of model implementations occurred in outpatient settings, and none of the embedded models were specific to individuals affected by diabetes [<xref ref-type="bibr" rid="ref67">67</xref>]. These findings highlight a critical gap, as well as opportunity, for leveraging real-world EHR data to facilitate real-time risk prediction and improve diabetes-related health outcomes.</p></sec><sec id="s4-2"><title>Limitations and Strengths</title><p>A strength of this study is its use of longitudinal EHR data to predict 90-day unit-change in HbA<sub>1c</sub> in a large cohort of youth with T1D. The scale and granularity of these data facilitated the creation of thousands of data features that we simultaneously analyzed as potential predictors for suboptimal glycemic outcomes. Additional strengths of this study include its use of explainable ML methods for evaluating model predictions and our use of a clinician-led, postmodeling decision analysis to enhance clinicians&#x2019; understanding and uptake of model predictions. The relevance of our model is underscored by its ability to forecast 90-day change in HbA<sub>1c</sub> for all youth receiving care through our regional clinic network, and not only for those using CGM systems.</p><p>Several limitations also warrant consideration. The data used in this study originated from a regional network of diabetes clinics in the Midwest United States and may not generalize to other geographic locations or health care settings, to future cohorts using rapidly evolving diabetes treatment technologies, or to more racially or socioeconomically diverse cohorts. External validation of the geographic and demographic &#x201C;transportability&#x201D; of this and future iterations of our model will hinge on ensuring that data from different clinical settings are collected in similar ways and standardized according to a common data model. Examples of such data standards include the Observational Medical Outcomes Partnership Common Data Model [<xref ref-type="bibr" rid="ref68">68</xref>] and the T1D Exchange Quality Improvement Collaborative data specification [<xref ref-type="bibr" rid="ref69">69</xref>]. As well, EHR data are subject to data entry errors and missing data that inadvertently occur as a part of routine clinical care. EHRs are also characterized by data fragmentation and reflect biases in clinical data collection, documentation, and decision-making [<xref ref-type="bibr" rid="ref13">13</xref>]. Therefore, results from this and all models constructed using EHR data must be interpreted carefully, given both known and unknown biases that impact model predictions.</p><p>Model generalizability could be enhanced by using standardized geographic-based features (eg, an area deprivation index or the Child Opportunity Index [<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]) rather than zip code, as well as by creating a final prediction model that includes only a limited number of the &#x201C;top-N features identified via cross-validation. Using additional data preprocessing methods (eg, one-hot encoding) when transforming categorical demographic features (eg, race and ethnicity) for ML would facilitate interpretability of model results pertaining to those features. Model performance may improve with additional hyperparameter tuning. This model&#x2019;s predictive utility could also be compared with that of models constructed using other ML methods, including other explainable AI methods and deep learning models. Finally, for youth who adopt diabetes technologies, such as CGM and automated insulin delivery systems, the inclusion of diabetes device data would likely significantly augment our model&#x2019;s predictions.</p><p>We acknowledge that translation of this work into clinical practice will be accompanied by various logistical and practical challenges. This study was designed as an &#x201C;initial step&#x201D; to evaluate the feasibility of using EHR data to predict change in HbA<sub>1c</sub>. As previously described, additional research is needed to address issues related to model refinement, validation (using data from external organizations, as well as future EHR data collected from our network of diabetes centers), and deployment in clinical settings. Future work can, for example, evaluate whether a limited set of standardized features may be useful for developing a more parsimonious model that can be readily disseminated to other institutions. Once deployed, ongoing monitoring of model performance will also be needed.</p><p>Furthermore, we acknowledge that refining and successfully incorporating this approach into clinical and decision workflows will hinge on the collection of additional evidence from future studies with even larger and more diverse patient cohorts, as well as buy-in and trust from both clinicians and patients. Although, in this iteration, our modeling approach yielded a nontrivial number of false positives, we note as well that our model&#x2019;s performance represents a substantial improvement over existing capabilities. Compared, for example, with initiating interventions randomly or initiating interventions at every diabetes clinic visit (to address youths&#x2019; rising glycemic levels, which occurred 40.7% of the time in our cohort), our modeling efforts facilitated pre-emptive identification of rising glucose levels three-fifths of the time. The 1.5-fold risk enrichment demonstrated in this work represents a meaningfully improved opportunity for more targeted initiation and delivery of interventions designed to lower youths&#x2019; glucose levels.</p></sec><sec id="s4-3"><title>Conclusions</title><p>Using EHR data to develop an ML-based prediction model to identify youth who will experience a clinically significant rise in HbA<sub>1c</sub> between diabetes clinic visits is both timely and feasible. Future research should aim to further optimize model performance, as well as evaluate model performance in racially or ethnically, socioeconomically, and geographically diverse cohorts. Future work is also needed to evaluate whether model results vary by duration of diabetes, use of technology (eg, CGM system users vs nonusers), and insulin delivery modality. Findings from this study may help to inform risk stratification and resource allocation efforts and serve as a catalyst for future quality improvement efforts focused on developing and evaluating personalized strategies and supports for optimizing diabetes self-management behaviors.</p></sec></sec></body><back><ack><p>The authors would like to thank Brian &#x201C;Mooose&#x201D; Rivera and Avinash Kollu for providing software engineering support to create the data pipeline necessary to complete this analysis, Adin Shniffer for project management, and Casey McClain and Emily DeWit for team and project management.</p><p>This study was funded by the Leona M. and Harry B. Helmsley Charitable Trust (grants G-2017PG-T1D019 and 2008&#x2010;04043). DF is supported by research funds from the Italian Ministry of Health. ARK is supported by the National Institutes on Aging, National Institutes of Health (grant K01-AG084971). The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health.</p></ack><notes><sec><title>Data Availability</title><p>The datasets generated and analyzed for this study are not publicly available due to sensitive information contained in patient medical records. Interested parties should contact the corresponding author to inquire about access. Source code for this study is available from the corresponding author upon reasonable request.</p></sec></notes><fn-group><fn fn-type="con"><p>EMT and CS participated in interpreting data and drafting the manuscript. DDW, CM, and MAC participated in study conceptualization and design, analysis, interpreting data, and drafting or revising the manuscript. BL, DF, CAV, MSB, ACS, ARK, SRP, SM, RM, ML, and LD participated in editing and revising the manuscript. All authors approved this manuscript for submission.</p></fn><fn fn-type="conflict"><p>CM and LD are employees of Blue Circle Health. RM is a consultant for Sanofi. ML has received research grants from Eli Lilly and Novo Nordisk and has been a consultant or has received honoraria from Astra Zeneca, Boehringer Ingelheim, Eli Lilly, Nordicinfu Care, Novo Nordisk, and Rubin Medical, all outside the submitted work. MAC is a consultant for Glooko, Inc. and receives research support from Dexcom and Abbott Diabetes Care. All other authors are responsible for the reported research and stated that they have no affiliation, financial agreement, or involvement with any company or other organization with a financial interest in the subject matter of the submitted manuscript.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">CCSR</term><def><p>Clinical Classification Software Revised</p></def></def-item><def-item><term id="abb2">CGM</term><def><p>continuous glucose monitoring</p></def></def-item><def-item><term id="abb3">CPT</term><def><p>Current Procedural Terminology</p></def></def-item><def-item><term id="abb4">EHR</term><def><p>electronic health record</p></def></def-item><def-item><term id="abb5">HbA<sub>1c</sub></term><def><p>glycated hemoglobin</p></def></def-item><def-item><term id="abb6">ICD-10</term><def><p><italic>International Statistical Classification of Diseases, Tenth Revision</italic></p></def></def-item><def-item><term id="abb7">ICD-9</term><def><p><italic>International Classification of Diseases, Ninth Revision</italic></p></def></def-item><def-item><term id="abb8">MAE</term><def><p>mean absolute error</p></def></def-item><def-item><term id="abb9">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb10">NPV</term><def><p>negative predictive value</p></def></def-item><def-item><term id="abb11">PPV</term><def><p>positive predictive value</p></def></def-item><def-item><term id="abb12">RF</term><def><p>random forest</p></def></def-item><def-item><term id="abb13">RMSE</term><def><p>root-mean-square error</p></def></def-item><def-item><term id="abb14">SNOMED CT</term><def><p>Systematized Nomenclature of Medicine Clinical Terms</p></def></def-item><def-item><term id="abb15">T1D</term><def><p>type 1 diabetes</p></def></def-item><def-item><term id="abb16">T2D</term><def><p>type 2 diabetes</p></def></def-item><def-item><term id="abb17">TF-IDF</term><def><p>term frequency-inverse document frequency</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>DiMeglio</surname><given-names>LA</given-names> </name><name name-style="western"><surname>Evans-Molina</surname><given-names>C</given-names> </name><name name-style="western"><surname>Oram</surname><given-names>RA</given-names> </name></person-group><article-title>Type 1 diabetes</article-title><source>Lancet</source><year>2018</year><month>06</month><day>16</day><volume>391</volume><issue>10138</issue><fpage>2449</fpage><lpage>2462</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(18)31320-5</pub-id><pub-id pub-id-type="medline">29916386</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fang</surname><given-names>M</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>D</given-names> </name><name name-style="western"><surname>Selvin</surname><given-names>E</given-names> </name></person-group><article-title>Prevalence of type 1 diabetes among US children and adults by age, sex, race, and ethnicity</article-title><source>JAMA</source><year>2024</year><month>04</month><day>23</day><volume>331</volume><issue>16</issue><fpage>1411</fpage><lpage>1413</lpage><pub-id pub-id-type="doi">10.1001/jama.2024.2103</pub-id><pub-id pub-id-type="medline">38573653</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><collab>American Diabetes Association Professional Practice Committee</collab></person-group><article-title>6. Glycemic goals and hypoglycemia: standards of care in diabetes&#x2014;2024</article-title><source>Diabetes Care</source><year>2024</year><month>01</month><day>1</day><volume>47</volume><issue>Supplement_1</issue><fpage>S111</fpage><lpage>S125</lpage><pub-id pub-id-type="doi">10.2337/dc24-S006</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><collab>American Diabetes Association Professional Practice Committee</collab></person-group><article-title>14. Children and adolescents: standards of care in diabetes&#x2014;2024</article-title><source>Diabetes Care</source><year>2024</year><month>01</month><day>1</day><volume>47</volume><issue>Supplement_1</issue><fpage>S258</fpage><lpage>S281</lpage><pub-id pub-id-type="doi">10.2337/dc24-S014</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pati&#x00F1;o-Fern&#x00E1;ndez</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Eidson</surname><given-names>M</given-names> </name><name name-style="western"><surname>Sanchez</surname><given-names>J</given-names> </name><name name-style="western"><surname>Delamater</surname><given-names>AM</given-names> </name></person-group><article-title>What do youth with type 1 diabetes know about the HbA1c test?</article-title><source>Child Health Care</source><year>2010</year><month>04</month><day>1</day><volume>38</volume><issue>2</issue><fpage>157</fpage><lpage>167</lpage><pub-id pub-id-type="doi">10.1080/02739610902813328</pub-id><pub-id pub-id-type="medline">20563233</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Foster</surname><given-names>NC</given-names> </name><name name-style="western"><surname>Beck</surname><given-names>RW</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>KM</given-names> </name><etal/></person-group><article-title>State of type 1 diabetes management and outcomes from the T1D Exchange in 2016-2018</article-title><source>Diabetes Technol Ther</source><year>2019</year><month>02</month><volume>21</volume><issue>2</issue><fpage>66</fpage><lpage>72</lpage><pub-id pub-id-type="doi">10.1089/dia.2018.0384</pub-id><pub-id pub-id-type="medline">30657336</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><collab>American Diabetes Association Professional Practice Committee</collab></person-group><article-title>7. Diabetes technology: standards of care in diabetes-2024</article-title><source>Diabetes Care</source><year>2024</year><month>01</month><day>1</day><volume>47</volume><issue>Suppl 1</issue><fpage>S126</fpage><lpage>S144</lpage><pub-id pub-id-type="doi">10.2337/dc24-S007</pub-id><pub-id pub-id-type="medline">38078575</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Redondo</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Libman</surname><given-names>I</given-names> </name><name name-style="western"><surname>Maahs</surname><given-names>DM</given-names> </name><etal/></person-group><article-title>The evolution of hemoglobin A<sub>1c</sub> targets for youth with type 1 diabetes: rationale and supporting evidence</article-title><source>Diabetes Care</source><year>2021</year><month>02</month><volume>44</volume><issue>2</issue><fpage>301</fpage><lpage>312</lpage><pub-id pub-id-type="doi">10.2337/dc20-1978</pub-id><pub-id pub-id-type="medline">33431422</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hermann</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Hofer</surname><given-names>SE</given-names> </name><etal/></person-group><article-title>The Transatlantic HbA<sub>1c</sub> gap: differences in glycaemic control across the lifespan between people included in the US T1D Exchange Registry and those included in the German/Austrian DPV registry</article-title><source>Diabet Med</source><year>2020</year><month>05</month><volume>37</volume><issue>5</issue><fpage>848</fpage><lpage>855</lpage><pub-id pub-id-type="doi">10.1111/dme.14148</pub-id><pub-id pub-id-type="medline">31557351</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Clements</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Schwandt</surname><given-names>A</given-names> </name><name name-style="western"><surname>Donaghue</surname><given-names>KC</given-names> </name><etal/></person-group><article-title>Five heterogeneous HbA1c trajectories from childhood to adulthood in youth with type 1 diabetes from three different continents: a group-based modeling approach</article-title><source>Pediatr Diabetes</source><year>2019</year><month>11</month><volume>20</volume><issue>7</issue><fpage>920</fpage><lpage>931</lpage><pub-id pub-id-type="doi">10.1111/pedi.12907</pub-id><pub-id pub-id-type="medline">31418521</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lachin</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Nathan</surname><given-names>DM</given-names> </name><collab>DCCT/EDIC Research Group</collab></person-group><article-title>Understanding metabolic memory: the prolonged influence of glycemia during the Diabetes Control and Complications Trial (DCCT) on future risks of complications during the study of the Epidemiology of Diabetes Interventions and Complications (EDIC)</article-title><source>Diabetes Care</source><year>2021</year><month>09</month><day>21</day><volume>44</volume><issue>10</issue><fpage>2216</fpage><lpage>2224</lpage><pub-id pub-id-type="doi">10.2337/dc20-3097</pub-id><pub-id pub-id-type="medline">34548284</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><collab>Writing Team for the Diabetes Control and Complications Trial/Epidemiology of Diabetes Interventions and Complications Research Group</collab></person-group><article-title>Sustained effect of intensive treatment of type 1 diabetes mellitus on development and progression of diabetic nephropathy: the Epidemiology of Diabetes Interventions and Complications (EDIC) study</article-title><source>JAMA</source><year>2003</year><month>10</month><day>22</day><volume>290</volume><issue>16</issue><fpage>2159</fpage><lpage>2167</lpage><pub-id pub-id-type="doi">10.1001/jama.290.16.2159</pub-id><pub-id pub-id-type="medline">14570951</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tang</surname><given-names>AS</given-names> </name><name name-style="western"><surname>Woldemariam</surname><given-names>SR</given-names> </name><name name-style="western"><surname>Miramontes</surname><given-names>S</given-names> </name><name name-style="western"><surname>Norgeot</surname><given-names>B</given-names> </name><name name-style="western"><surname>Oskotsky</surname><given-names>TT</given-names> </name><name name-style="western"><surname>Sirota</surname><given-names>M</given-names> </name></person-group><article-title>Harnessing EHR data for health research</article-title><source>Nat Med</source><year>2024</year><month>07</month><volume>30</volume><issue>7</issue><fpage>1847</fpage><lpage>1855</lpage><pub-id pub-id-type="doi">10.1038/s41591-024-03074-8</pub-id><pub-id pub-id-type="medline">38965433</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sauer</surname><given-names>CM</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>LC</given-names> </name><name name-style="western"><surname>Hyland</surname><given-names>SL</given-names> </name><name name-style="western"><surname>Girbes</surname><given-names>A</given-names> </name><name name-style="western"><surname>Elbers</surname><given-names>P</given-names> </name><name name-style="western"><surname>Celi</surname><given-names>LA</given-names> </name></person-group><article-title>Leveraging electronic health records for data science: common pitfalls and how to avoid them</article-title><source>Lancet Digit Health</source><year>2022</year><month>12</month><volume>4</volume><issue>12</issue><fpage>e893</fpage><lpage>e898</lpage><pub-id pub-id-type="doi">10.1016/S2589-7500(22)00154-6</pub-id><pub-id pub-id-type="medline">36154811</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Goldstein</surname><given-names>BA</given-names> </name><name name-style="western"><surname>Navar</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Pencina</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Ioannidis</surname><given-names>JPA</given-names> </name></person-group><article-title>Opportunities and challenges in developing risk prediction models with electronic health records data: a systematic review</article-title><source>J Am Med Inform Assoc</source><year>2017</year><month>01</month><volume>24</volume><issue>1</issue><fpage>198</fpage><lpage>208</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocw042</pub-id><pub-id pub-id-type="medline">27189013</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zrubka</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Kert&#x00E9;sz</surname><given-names>G</given-names> </name><name name-style="western"><surname>Gul&#x00E1;csi</surname><given-names>L</given-names> </name><etal/></person-group><article-title>The reporting quality of machine learning studies on pediatric diabetes mellitus: systematic review</article-title><source>J Med Internet Res</source><year>2024</year><month>01</month><day>19</day><volume>26</volume><fpage>e47430</fpage><pub-id pub-id-type="doi">10.2196/47430</pub-id><pub-id pub-id-type="medline">38241075</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>K</given-names> </name><name name-style="western"><surname>Li</surname><given-names>L</given-names> </name><name name-style="western"><surname>Ma</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Machine learning models for blood glucose level prediction in patients with diabetes mellitus: systematic review and network meta-analysis</article-title><source>JMIR Med Inform</source><year>2023</year><month>11</month><day>20</day><volume>11</volume><fpage>e47833</fpage><pub-id pub-id-type="doi">10.2196/47833</pub-id><pub-id pub-id-type="medline">37983072</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fregoso-Aparicio</surname><given-names>L</given-names> </name><name name-style="western"><surname>Noguez</surname><given-names>J</given-names> </name><name name-style="western"><surname>Montesinos</surname><given-names>L</given-names> </name><name name-style="western"><surname>Garc&#x00ED;a-Garc&#x00ED;a</surname><given-names>JA</given-names> </name></person-group><article-title>Machine learning and deep learning predictive models for type 2 diabetes: a systematic review</article-title><source>Diabetol Metab Syndr</source><year>2021</year><month>12</month><day>20</day><volume>13</volume><issue>1</issue><fpage>148</fpage><pub-id pub-id-type="doi">10.1186/s13098-021-00767-9</pub-id><pub-id pub-id-type="medline">34930452</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Williams</surname><given-names>DD</given-names> </name><name name-style="western"><surname>Ferro</surname><given-names>D</given-names> </name><name name-style="western"><surname>Mullaney</surname><given-names>C</given-names> </name><etal/></person-group><article-title>An &#x201C;All-Data-on-Hand&#x201D; deep learning model to predict hospitalization for diabetic ketoacidosis in youth with type 1 diabetes: development and validation study</article-title><source>JMIR Diabetes</source><year>2023</year><month>07</month><day>18</day><volume>8</volume><fpage>e47592</fpage><pub-id pub-id-type="doi">10.2196/47592</pub-id><pub-id pub-id-type="medline">37224506</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Subramanian</surname><given-names>D</given-names> </name><name name-style="western"><surname>Sonabend</surname><given-names>R</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>I</given-names> </name></person-group><article-title>A machine learning model for risk stratification of postdiagnosis diabetic ketoacidosis hospitalization in pediatric type 1 diabetes: retrospective study</article-title><source>JMIR Diabetes</source><year>2024</year><month>08</month><day>7</day><volume>9</volume><fpage>e53338</fpage><pub-id pub-id-type="doi">10.2196/53338</pub-id><pub-id pub-id-type="medline">39110490</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tallon</surname><given-names>EM</given-names> </name><name name-style="western"><surname>Ebekozien</surname><given-names>O</given-names> </name><name name-style="western"><surname>Sanchez</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Impact of diabetes status and related factors on COVID-19-associated hospitalization: a nationwide retrospective cohort study of 116,370 adults with SARS-CoV-2 infection</article-title><source>Diabetes Res Clin Pract</source><year>2022</year><month>12</month><volume>194</volume><fpage>110156</fpage><pub-id pub-id-type="doi">10.1016/j.diabres.2022.110156</pub-id><pub-id pub-id-type="medline">36400172</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="web"><article-title>What is random forest?</article-title><source>IBM</source><access-date>2024-12-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ibm.com/topics/random-forest">https://www.ibm.com/topics/random-forest</ext-link></comment></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Breiman</surname><given-names>L</given-names> </name></person-group><article-title>Random Forests</article-title><source>Mach Learn</source><year>2001</year><month>10</month><volume>45</volume><issue>1</issue><fpage>5</fpage><lpage>32</lpage><pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="web"><article-title>Oracle Health EHR</article-title><source>Oracle</source><access-date>2024-12-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.oracle.com/health/clinical-suite/electronic-health-record/">https://www.oracle.com/health/clinical-suite/electronic-health-record/</ext-link></comment></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Prahalad</surname><given-names>P</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Scheinker</surname><given-names>D</given-names> </name><name name-style="western"><surname>Desai</surname><given-names>M</given-names> </name><name name-style="western"><surname>Hood</surname><given-names>K</given-names> </name><name name-style="western"><surname>Maahs</surname><given-names>DM</given-names> </name></person-group><article-title>Hemoglobin A1c trajectory in pediatric patients with newly diagnosed type 1 diabetes</article-title><source>Diabetes Technol Ther</source><year>2019</year><month>08</month><volume>21</volume><issue>8</issue><fpage>456</fpage><lpage>461</lpage><pub-id pub-id-type="doi">10.1089/dia.2019.0065</pub-id><pub-id pub-id-type="medline">31180244</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ibfelt</surname><given-names>EH</given-names> </name><name name-style="western"><surname>Wibaek</surname><given-names>R</given-names> </name><name name-style="western"><surname>Vistisen</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Trajectory and predictors of HbA1c in children and adolescents with type 1 diabetes-a Danish nationwide cohort study</article-title><source>Pediatr Diabetes</source><year>2022</year><month>09</month><volume>23</volume><issue>6</issue><fpage>721</fpage><lpage>728</lpage><pub-id pub-id-type="doi">10.1111/pedi.13337</pub-id><pub-id pub-id-type="medline">35366046</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Miller</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Foster</surname><given-names>NC</given-names> </name><name name-style="western"><surname>Beck</surname><given-names>RW</given-names> </name><etal/></person-group><article-title>Current state of type 1 diabetes treatment in the U.S.: updated data from the T1D Exchange clinic registry</article-title><source>Diabetes Care</source><year>2015</year><month>06</month><volume>38</volume><issue>6</issue><fpage>971</fpage><lpage>978</lpage><pub-id pub-id-type="doi">10.2337/dc15-0078</pub-id><pub-id pub-id-type="medline">25998289</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lind</surname><given-names>M</given-names> </name><name name-style="western"><surname>Polonsky</surname><given-names>W</given-names> </name><name name-style="western"><surname>Hirsch</surname><given-names>IB</given-names> </name><etal/></person-group><article-title>Continuous glucose monitoring vs conventional therapy for glycemic control in adults with type 1 diabetes treated with multiple daily insulin injections: the GOLD randomized clinical trial</article-title><source>JAMA</source><year>2017</year><month>01</month><day>24</day><volume>317</volume><issue>4</issue><fpage>379</fpage><lpage>387</lpage><pub-id pub-id-type="doi">10.1001/jama.2016.19976</pub-id><pub-id pub-id-type="medline">28118454</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="web"><article-title>Multum MediSource Lexicon (MMSL) Source Information</article-title><source>National Library of Medicine</source><year>2020</year><month>03</month><day>31</day><comment><ext-link ext-link-type="uri" xlink:href="https://www.nlm.nih.gov/research/umls/rxnorm/sourcereleasedocs/mmsl.html">https://www.nlm.nih.gov/research/umls/rxnorm/sourcereleasedocs/mmsl.html</ext-link></comment></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Kavlakoglu</surname><given-names>E</given-names> </name><name name-style="western"><surname>Winland</surname><given-names>V</given-names> </name></person-group><article-title>What is k-means clustering?</article-title><source>IBM</source><year>2024</year><access-date>2024-12-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ibm.com/topics/k-means-clustering">https://www.ibm.com/topics/k-means-clustering</ext-link></comment></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="web"><article-title>StringIndexer</article-title><source>Apache Software Foundation</source><access-date>2024-12-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.feature.StringIndexer.html">https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.feature.StringIndexer.html</ext-link></comment></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="web"><person-group person-group-type="author"><collab>Agency for Healthcare Research and Quality</collab></person-group><source>Healthcare Cost &#x0026; Utilization Project user support: Clinical Classifications Software Refined (CCSR) for ICD-10-CM diagnoses</source><year>2024</year><access-date>2024-12-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://hcup-us.ahrq.gov/toolssoftware/ccsr/dxccsr.jsp">https://hcup-us.ahrq.gov/toolssoftware/ccsr/dxccsr.jsp</ext-link></comment></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Manning</surname><given-names>CD</given-names> </name><name name-style="western"><surname>Raghavan</surname><given-names>P</given-names> </name><name name-style="western"><surname>Sch&#x00FC;tze</surname><given-names>H</given-names> </name></person-group><source>An Introduction to Information Retrieval</source><year>2008</year><access-date>2025-09-19</access-date><publisher-name>Cambridge, England: Cambridge University Press</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://nlp.stanford.edu/IR-book/pdf/irbookonlinereading.pdf">https://nlp.stanford.edu/IR-book/pdf/irbookonlinereading.pdf</ext-link></comment><pub-id pub-id-type="other">0521865719</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="web"><article-title>What is bagging?</article-title><source>IBM</source><access-date>2024-12-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ibm.com/topics/bagging">https://www.ibm.com/topics/bagging</ext-link></comment></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liaw</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wiener</surname><given-names>M</given-names> </name></person-group><article-title>Classification and regression by randomforest</article-title><source>R News</source><year>2002</year><access-date>2025-09-19</access-date><fpage>18</fpage><lpage>22</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://journal.r-project.org/articles/RN-2002-022/RN-2002-022.pdf">https://journal.r-project.org/articles/RN-2002-022/RN-2002-022.pdf</ext-link></comment></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Breiman</surname><given-names>L</given-names> </name></person-group><article-title>Bagging predictors</article-title><source>Mach Learn</source><year>1996</year><month>08</month><volume>24</volume><issue>2</issue><fpage>123</fpage><lpage>140</lpage><pub-id pub-id-type="doi">10.1007/BF00058655</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jackson</surname><given-names>EK</given-names> </name><name name-style="western"><surname>Roberts</surname><given-names>W</given-names> </name><name name-style="western"><surname>Nelsen</surname><given-names>B</given-names> </name><name name-style="western"><surname>Williams</surname><given-names>GP</given-names> </name><name name-style="western"><surname>Nelson</surname><given-names>EJ</given-names> </name><name name-style="western"><surname>Ames</surname><given-names>DP</given-names> </name></person-group><article-title>Introductory overview: error metrics for hydrologic modelling &#x2013; a review of common practices and an open source library to facilitate use and adoption</article-title><source>Environ Model Softw</source><year>2019</year><month>09</month><volume>119</volume><fpage>32</fpage><lpage>48</lpage><pub-id pub-id-type="doi">10.1016/j.envsoft.2019.05.001</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="web"><article-title>RandomForestRegressor</article-title><source>Apache Software Foundation</source><access-date>2024-12-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://spark.apache.org/docs/3.5.2/api/scala/org/apache/spark/ml/regression/RandomForestRegressor.html">https://spark.apache.org/docs/3.5.2/api/scala/org/apache/spark/ml/regression/RandomForestRegressor.html</ext-link></comment></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ishwaran</surname><given-names>H</given-names> </name></person-group><article-title>The effect of splitting on random forests</article-title><source>Mach Learn</source><year>2015</year><month>04</month><volume>99</volume><issue>1</issue><fpage>75</fpage><lpage>118</lpage><pub-id pub-id-type="doi">10.1007/s10994-014-5451-2</pub-id><pub-id pub-id-type="medline">28919667</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nembrini</surname><given-names>S</given-names> </name><name name-style="western"><surname>K&#x00F6;nig</surname><given-names>IR</given-names> </name><name name-style="western"><surname>Wright</surname><given-names>MN</given-names> </name></person-group><article-title>The revival of the Gini importance?</article-title><source>Bioinformatics</source><year>2018</year><month>11</month><day>1</day><volume>34</volume><issue>21</issue><fpage>3711</fpage><lpage>3718</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/bty373</pub-id><pub-id pub-id-type="medline">29757357</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="web"><article-title>Machine Learning Library (MLlib) Guide</article-title><source>Apache Software Foundation</source><access-date>2024-12-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://spark.apache.org/docs/latest/ml-guide">https://spark.apache.org/docs/latest/ml-guide</ext-link></comment></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="book"><person-group person-group-type="author"><collab>StataCorp</collab></person-group><source>Stata Statistical Software</source><year>2023</year><edition>18</edition><publisher-name>College Station, Texas: StataCorp LLC</publisher-name></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Trevethan</surname><given-names>R</given-names> </name></person-group><article-title>Sensitivity, specificity, and predictive values: foundations, pliabilities, and pitfalls in research and practice</article-title><source>Front Public Health</source><year>2017</year><volume>5</volume><fpage>307</fpage><pub-id pub-id-type="doi">10.3389/fpubh.2017.00307</pub-id><pub-id pub-id-type="medline">29209603</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Holm</surname><given-names>TF</given-names> </name><name name-style="western"><surname>Jensen</surname><given-names>MH</given-names> </name><name name-style="western"><surname>Hejlesen</surname><given-names>OK</given-names> </name><name name-style="western"><surname>Hagstr&#x00F8;m</surname><given-names>S</given-names> </name><name name-style="western"><surname>Madsen</surname><given-names>M</given-names> </name><name name-style="western"><surname>Hangaard</surname><given-names>S</given-names> </name></person-group><article-title>Prediction of poor glycemic control in children with type 1 diabetes</article-title><source>Stud Health Technol Inform</source><year>2024</year><month>08</month><day>22</day><volume>316</volume><fpage>1759</fpage><lpage>1760</lpage><pub-id pub-id-type="doi">10.3233/SHTI240770</pub-id><pub-id pub-id-type="medline">39176556</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Everett</surname><given-names>E</given-names> </name><name name-style="western"><surname>Mathioudakis</surname><given-names>N</given-names> </name></person-group><article-title>Association of area deprivation and diabetic ketoacidosis readmissions: comparative risk analysis of adults vs children with type 1 diabetes</article-title><source>J Clin Endocrinol Metab</source><year>2019</year><month>08</month><day>1</day><volume>104</volume><issue>8</issue><fpage>3473</fpage><lpage>3480</lpage><pub-id pub-id-type="doi">10.1210/jc.2018-02232</pub-id><pub-id pub-id-type="medline">31220288</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Carter</surname><given-names>PJ</given-names> </name><name name-style="western"><surname>Cutfield</surname><given-names>WS</given-names> </name><name name-style="western"><surname>Hofman</surname><given-names>PL</given-names> </name><etal/></person-group><article-title>Ethnicity and social deprivation independently influence metabolic control in children with type 1 diabetes</article-title><source>Diabetologia</source><year>2008</year><month>10</month><volume>51</volume><issue>10</issue><fpage>1835</fpage><lpage>1842</lpage><pub-id pub-id-type="doi">10.1007/s00125-008-1106-9</pub-id><pub-id pub-id-type="medline">18679654</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hoyek</surname><given-names>K</given-names> </name><name name-style="western"><surname>Libman</surname><given-names>I</given-names> </name><name name-style="western"><surname>Mkparu</surname><given-names>N</given-names> </name><name name-style="western"><surname>Hong</surname><given-names>YH</given-names> </name><name name-style="western"><surname>Arslanian</surname><given-names>S</given-names> </name><name name-style="western"><surname>Vajravelu</surname><given-names>ME</given-names> </name></person-group><article-title>Child Opportunity Index and clinical characteristics at diabetes diagnosis in youth: type 1 diabetes versus type 2 diabetes</article-title><source>BMJ Open Diabetes Res Care</source><year>2024</year><month>04</month><day>17</day><volume>12</volume><issue>2</issue><fpage>e003968</fpage><pub-id pub-id-type="doi">10.1136/bmjdrc-2023-003968</pub-id><pub-id pub-id-type="medline">38631820</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tao</surname><given-names>X</given-names> </name><name name-style="western"><surname>Jiang</surname><given-names>M</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Predicting three-month fasting blood glucose and glycated hemoglobin changes in patients with type 2 diabetes mellitus based on multiple machine learning algorithms</article-title><source>Sci Rep</source><year>2023</year><month>09</month><day>30</day><volume>13</volume><issue>1</issue><fpage>16437</fpage><pub-id pub-id-type="doi">10.1038/s41598-023-43240-5</pub-id><pub-id pub-id-type="medline">37777593</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bombaci</surname><given-names>B</given-names> </name><name name-style="western"><surname>Torre</surname><given-names>A</given-names> </name><name name-style="western"><surname>Longo</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Psychological and clinical challenges in the management of type 1 diabetes during adolescence: a narrative review</article-title><source>Children (Basel)</source><year>2024</year><month>09</month><day>4</day><volume>11</volume><issue>9</issue><fpage>1085</fpage><pub-id pub-id-type="doi">10.3390/children11091085</pub-id><pub-id pub-id-type="medline">39334618</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Rusnak</surname><given-names>A</given-names> </name><name name-style="western"><surname>Garrity</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Feasibility of electronic health record assessment of 6 pediatric type 1 diabetes self-management habits and their association with glycemic outcomes</article-title><source>JAMA Netw Open</source><year>2021</year><month>10</month><day>1</day><volume>4</volume><issue>10</issue><fpage>e2131278</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2021.31278</pub-id><pub-id pub-id-type="medline">34709387</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Daniel</surname><given-names>R</given-names> </name><name name-style="western"><surname>Jones</surname><given-names>H</given-names> </name><name name-style="western"><surname>Gregory</surname><given-names>JW</given-names> </name><etal/></person-group><article-title>Predicting type 1 diabetes in children using electronic health records in primary care in the UK: development and validation of a machine-learning algorithm</article-title><source>Lancet Digit Health</source><year>2024</year><month>06</month><volume>6</volume><issue>6</issue><fpage>e386</fpage><lpage>e395</lpage><pub-id pub-id-type="doi">10.1016/S2589-7500(24)00050-5</pub-id><pub-id pub-id-type="medline">38789139</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>L</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>CC</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>FL</given-names> </name><etal/></person-group><article-title>Performance assessment of different machine learning approaches in predicting diabetic ketoacidosis in adults with type 1 diabetes using electronic health records data</article-title><source>Pharmacoepidemiol Drug Saf</source><year>2021</year><month>05</month><volume>30</volume><issue>5</issue><fpage>610</fpage><lpage>618</lpage><pub-id pub-id-type="doi">10.1002/pds.5199</pub-id><pub-id pub-id-type="medline">33480091</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bergenstal</surname><given-names>RM</given-names> </name><name name-style="western"><surname>Beck</surname><given-names>RW</given-names> </name><name name-style="western"><surname>Close</surname><given-names>KL</given-names> </name><etal/></person-group><article-title>Glucose Management Indicator (GMI): a new term for estimating A1C from continuous glucose monitoring</article-title><source>Diabetes Care</source><year>2018</year><month>11</month><volume>41</volume><issue>11</issue><fpage>2275</fpage><lpage>2280</lpage><pub-id pub-id-type="doi">10.2337/dc18-1581</pub-id><pub-id pub-id-type="medline">30224348</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Islam</surname><given-names>MS</given-names> </name><name name-style="western"><surname>Qaraqe</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Belhaouari</surname><given-names>S</given-names> </name><name name-style="western"><surname>Petrovski</surname><given-names>G</given-names> </name></person-group><article-title>Long term HbA1c prediction using multi-stage CGM data analysis</article-title><source>IEEE Sensors J</source><year>2021</year><volume>21</volume><issue>13</issue><fpage>15237</fpage><lpage>15247</lpage><pub-id pub-id-type="doi">10.1109/JSEN.2021.3073974</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Qaraqe</surname><given-names>M</given-names> </name><name name-style="western"><surname>Elzein</surname><given-names>A</given-names> </name><name name-style="western"><surname>Belhaouari</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ilam</surname><given-names>MS</given-names> </name><name name-style="western"><surname>Petrovski</surname><given-names>G</given-names> </name></person-group><article-title>A novel few shot learning derived architecture for long-term HbA1c prediction</article-title><source>Sci Rep</source><year>2024</year><month>01</month><day>4</day><volume>14</volume><issue>1</issue><fpage>482</fpage><pub-id pub-id-type="doi">10.1038/s41598-023-50348-1</pub-id><pub-id pub-id-type="medline">38177624</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bergenstal</surname><given-names>RM</given-names> </name><name name-style="western"><surname>Gal</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Connor</surname><given-names>CG</given-names> </name><etal/></person-group><article-title>Racial differences in the relationship of glucose concentrations and hemoglobin A1c levels</article-title><source>Ann Intern Med</source><year>2017</year><month>07</month><day>18</day><volume>167</volume><issue>2</issue><fpage>95</fpage><lpage>102</lpage><pub-id pub-id-type="doi">10.7326/M16-2596</pub-id><pub-id pub-id-type="medline">28605777</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>DeSalvo</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>Lanzinger</surname><given-names>S</given-names> </name><name name-style="western"><surname>Noor</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Transatlantic comparison of pediatric continuous glucose monitoring use in the diabetes-patienten-verlaufsdokumentation initiative and type 1 diabetes exchange quality improvement collaborative</article-title><source>Diabetes Technol Ther</source><year>2022</year><month>12</month><volume>24</volume><issue>12</issue><fpage>920</fpage><lpage>924</lpage><pub-id pub-id-type="doi">10.1089/dia.2022.0248</pub-id><pub-id pub-id-type="medline">35947079</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>DeSalvo</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>Noor</surname><given-names>N</given-names> </name><name name-style="western"><surname>Xie</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Patient demographics and clinical outcomes among type 1 diabetes patients using continuous glucose monitors: data from T1D exchange real-world observational study</article-title><source>J Diabetes Sci Technol</source><year>2023</year><month>03</month><volume>17</volume><issue>2</issue><fpage>322</fpage><lpage>328</lpage><pub-id pub-id-type="doi">10.1177/19322968211049783</pub-id><pub-id pub-id-type="medline">34632823</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ebekozien</surname><given-names>O</given-names> </name><name name-style="western"><surname>Mungmode</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sanchez</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Longitudinal trends in glycemic outcomes and technology use for over 48,000 people with type 1 diabetes (2016-2022) from the T1D exchange quality improvement collaborative</article-title><source>Diabetes Technol Ther</source><year>2023</year><month>11</month><volume>25</volume><issue>11</issue><fpage>765</fpage><lpage>773</lpage><pub-id pub-id-type="doi">10.1089/dia.2023.0320</pub-id><pub-id pub-id-type="medline">37768677</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rigo</surname><given-names>RS</given-names> </name><name name-style="western"><surname>Levin</surname><given-names>LE</given-names> </name><name name-style="western"><surname>Belsito</surname><given-names>DV</given-names> </name><name name-style="western"><surname>Garzon</surname><given-names>MC</given-names> </name><name name-style="western"><surname>Gandica</surname><given-names>R</given-names> </name><name name-style="western"><surname>Williams</surname><given-names>KM</given-names> </name></person-group><article-title>Cutaneous reactions to continuous glucose monitoring and continuous subcutaneous insulin infusion devices in type 1 diabetes mellitus</article-title><source>J Diabetes Sci Technol</source><year>2021</year><month>07</month><volume>15</volume><issue>4</issue><fpage>786</fpage><lpage>791</lpage><pub-id pub-id-type="doi">10.1177/1932296820918894</pub-id><pub-id pub-id-type="medline">32389062</pub-id></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tilden</surname><given-names>DR</given-names> </name><name name-style="western"><surname>French</surname><given-names>B</given-names> </name><name name-style="western"><surname>Datye</surname><given-names>KA</given-names> </name><name name-style="western"><surname>Jaser</surname><given-names>SS</given-names> </name></person-group><article-title>Disparities in continuous glucose monitor use between children with type 1 diabetes living in urban and rural areas</article-title><source>Diabetes Care</source><year>2024</year><month>03</month><day>1</day><volume>47</volume><issue>3</issue><fpage>346</fpage><lpage>352</lpage><pub-id pub-id-type="doi">10.2337/dc23-1564</pub-id><pub-id pub-id-type="medline">37906202</pub-id></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Barnard-Kelly</surname><given-names>KD</given-names> </name><name name-style="western"><surname>Mart&#x00ED;nez-Brocca</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Glatzer</surname><given-names>T</given-names> </name><name name-style="western"><surname>Oliver</surname><given-names>N</given-names> </name></person-group><article-title>Identifying the deficiencies of currently available CGM to improve uptake and benefit</article-title><source>Diabet Med</source><year>2024</year><month>08</month><volume>41</volume><issue>8</issue><fpage>e15338</fpage><pub-id pub-id-type="doi">10.1111/dme.15338</pub-id><pub-id pub-id-type="medline">38736324</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Espinoza</surname><given-names>J</given-names> </name><name name-style="western"><surname>Shah</surname><given-names>P</given-names> </name><name name-style="western"><surname>Raymond</surname><given-names>J</given-names> </name></person-group><article-title>Integrating continuous glucose monitor data directly into the electronic health record: proof of concept</article-title><source>Diabetes Technol Ther</source><year>2020</year><month>08</month><volume>22</volume><issue>8</issue><fpage>570</fpage><lpage>576</lpage><pub-id pub-id-type="doi">10.1089/dia.2019.0377</pub-id><pub-id pub-id-type="medline">31904260</pub-id></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Okuno</surname><given-names>T</given-names> </name><name name-style="western"><surname>Macwan</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>D</given-names> </name><name name-style="western"><surname>Norman</surname><given-names>GJ</given-names> </name><name name-style="western"><surname>Reaven</surname><given-names>P</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>JJ</given-names> </name></person-group><article-title>Assessing patterns of continuous glucose monitoring use and metrics of glycemic control in type 1 diabetes and type 2 diabetes patients in the veterans health care system: integrating continuous glucose monitoring device data with electronic health records data</article-title><source>Diabetes Technol Ther</source><year>2024</year><month>11</month><volume>26</volume><issue>11</issue><fpage>806</fpage><lpage>813</lpage><pub-id pub-id-type="doi">10.1089/dia.2024.0083</pub-id><pub-id pub-id-type="medline">38768417</pub-id></nlm-citation></ref><ref id="ref65"><label>65</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Espinoza</surname><given-names>J</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>NY</given-names> </name><name name-style="western"><surname>Nguyen</surname><given-names>KT</given-names> </name><name name-style="western"><surname>Klonoff</surname><given-names>DC</given-names> </name></person-group><article-title>The need for data standards and implementation policies to integrate CGM data into the electronic health record</article-title><source>J Diabetes Sci Technol</source><year>2023</year><month>03</month><volume>17</volume><issue>2</issue><fpage>495</fpage><lpage>502</lpage><pub-id pub-id-type="doi">10.1177/19322968211058148</pub-id><pub-id pub-id-type="medline">34802286</pub-id></nlm-citation></ref><ref id="ref66"><label>66</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kamel Rahimi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Canfell</surname><given-names>OJ</given-names> </name><name name-style="western"><surname>Chan</surname><given-names>W</given-names> </name><etal/></person-group><article-title>Machine learning models for diabetes management in acute care using electronic medical records: a systematic review</article-title><source>Int J Med Inform</source><year>2022</year><month>06</month><volume>162</volume><fpage>104758</fpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2022.104758</pub-id><pub-id pub-id-type="medline">35398812</pub-id></nlm-citation></ref><ref id="ref67"><label>67</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>TC</given-names> </name><name name-style="western"><surname>Shah</surname><given-names>NU</given-names> </name><name name-style="western"><surname>Haack</surname><given-names>A</given-names> </name><name name-style="western"><surname>Baxter</surname><given-names>SL</given-names> </name></person-group><article-title>Clinical implementation of predictive models embedded within electronic health record systems: a systematic review</article-title><source>Informatics (MDPI)</source><year>2020</year><month>09</month><volume>7</volume><issue>3</issue><fpage>25</fpage><pub-id pub-id-type="doi">10.3390/informatics7030025</pub-id><pub-id pub-id-type="medline">33274178</pub-id></nlm-citation></ref><ref id="ref68"><label>68</label><nlm-citation citation-type="web"><article-title>Standardized data: the OMOP common data model</article-title><source>Observational Health Data Sciences and Informatics</source><access-date>2025-06-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ohdsi.org/data-standardization/">https://www.ohdsi.org/data-standardization/</ext-link></comment></nlm-citation></ref><ref id="ref69"><label>69</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mungmode</surname><given-names>A</given-names> </name><name name-style="western"><surname>Noor</surname><given-names>N</given-names> </name><name name-style="western"><surname>Weinstock</surname><given-names>RS</given-names> </name><etal/></person-group><article-title>Making diabetes electronic medical record data actionable: promoting benchmarking and population health improvement using the T1D exchange quality improvement portal</article-title><source>Clin Diabetes</source><year>2022</year><volume>41</volume><issue>1</issue><fpage>45</fpage><lpage>55</lpage><pub-id pub-id-type="doi">10.2337/cd22-0072</pub-id><pub-id pub-id-type="medline">36714251</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Electronic health records&#x2013;based identification of a cohort of youth with type 1 diabetes.</p><media xlink:href="diabetes_v10i1e69142_app1.docx" xlink:title="DOCX File, 27 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Top 30 most important features for predicting 90-day percent change in glycated hemoglobin in youth with type 1 diabetes.</p><media xlink:href="diabetes_v10i1e69142_app2.docx" xlink:title="DOCX File, 30 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Sensitivity, specificity, positive predictive value, and negative predictive value of predicted versus true percent change in glycated hemoglobin for each cross-validation K-fold.</p><media xlink:href="diabetes_v10i1e69142_app3.docx" xlink:title="DOCX File, 30 KB"/></supplementary-material></app-group></back></article>