<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Diabetes</journal-id><journal-id journal-id-type="publisher-id">diabetes</journal-id><journal-id journal-id-type="index">23</journal-id><journal-title>JMIR Diabetes</journal-title><abbrev-journal-title>JMIR Diabetes</abbrev-journal-title><issn pub-type="epub">2371-4379</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v11i1e85372</article-id><article-id pub-id-type="doi">10.2196/85372</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Predicting Treatment Failure With Sodium-Glucose Cotransporter-2 Inhibitors in People With Type 2 Diabetes: Novel Artificial Intelligence and Machine Learning Approach</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Kwak</surname><given-names>Doyoung</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Tan</surname><given-names>Xi</given-names></name><degrees>PharmD, PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Liang</surname><given-names>Yuanjie</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Swift</surname><given-names>Caroline</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Muhammad</surname><given-names>Chalak</given-names></name><degrees>MPH, MD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Shi</surname><given-names>Xu</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib></contrib-group><aff id="aff1"><institution>Texas A&#x0026;M University</institution><addr-line>College Station</addr-line><addr-line>TX</addr-line><country>United States</country></aff><aff id="aff2"><institution>Novo Nordisk Inc</institution><addr-line>800 Scudders Mill Road</addr-line><addr-line>Plainsboro</addr-line><addr-line>NJ</addr-line><country>United States</country></aff><aff id="aff3"><institution>University of Michigan</institution><addr-line>Ann Arbor</addr-line><addr-line>MI</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Steenstra</surname><given-names>Ivan</given-names></name></contrib><contrib contrib-type="editor"><name name-style="western"><surname>Lu</surname><given-names>Jingyi</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Ma</surname><given-names>Jiahui</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Yu</surname><given-names>Xia</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Xi Tan, PharmD, PhD, Novo Nordisk Inc, 800 Scudders Mill Road, Plainsboro, NJ, 08536, United States; <email>MXTZ@novonordisk.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>20</day><month>5</month><year>2026</year></pub-date><volume>11</volume><elocation-id>e85372</elocation-id><history><date date-type="received"><day>09</day><month>10</month><year>2025</year></date><date date-type="rev-recd"><day>13</day><month>02</month><year>2026</year></date><date date-type="accepted"><day>26</day><month>02</month><year>2026</year></date></history><copyright-statement>&#x00A9; Doyoung Kwak, Xi Tan, Yuanjie Liang, Caroline Swift, Chalak Muhammad, Xu Shi. Originally published in JMIR Diabetes (<ext-link ext-link-type="uri" xlink:href="https://diabetes.jmir.org">https://diabetes.jmir.org</ext-link>), 20.5.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Diabetes, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://diabetes.jmir.org/">https://diabetes.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://diabetes.jmir.org/2026/1/e85372"/><abstract><sec><title>Background</title><p>The rate of treatment failure with sodium-glucose cotransporter-2 inhibitors (SGLT2i) is high among individuals with type 2 diabetes (T2D). Accurately predicting SGLT2i treatment failure is important for improving the clinical management of T2D.</p></sec><sec><title>Objective</title><p>The study aimed to use machine learning (ML) models to identify factors predicting treatment failure with SGLT2i in T2D and to evaluate model performance.</p></sec><sec sec-type="methods"><title>Methods</title><p>This retrospective observational cohort study included adults with T2D treated with SGLT2i (2016-2024). The primary outcome was overall treatment failure with SGLT2i during follow-up (&#x2265;180 days after SGLT2i initiation). The secondary outcome was subtypes of treatment failure with SGLT2i (treatment discontinuation, failure with action, and inertial failure) or nonfailure, which was defined as not meeting the definition for one of the failure subtypes. Variables potentially associated with treatment failure were assessed during the year before SGLT2i treatment initiation (analysis 1) and the year before SGLT2i treatment failure (analysis 2). Using these variables, ML models&#x2014;logistic regression (LR), multilayer perceptron (MLP), extreme gradient boosting (XGBoost), and Transformer&#x2014;were used to identify significant predictors of the outcomes. Model performance metrics (accuracy, area under the curve, precision, recall, and <italic>F</italic><sub>1</sub>-score) were calculated. Using Shapley Additive Explanations methodology, key features were identified based on their impact on model predictions. LR and Transformer models using key features were further evaluated for their potential to support the development of a risk score for predicting treatment failure with SGLT2i.</p></sec><sec sec-type="results"><title>Results</title><p>Among all individuals in the study (N=62,222), 71% (n=44,156) had treatment failure with SGLT2i. Across subtypes, failure with action (n=23,839, 38.3%) was more common than treatment discontinuation (n=16,449, 26.4%) and inertial failure (n=3868, 6.2%). Model performance was moderate in both analyses. In analysis 1, the accuracy ranged from 0.72 to 0.73 for predicting overall treatment failure and from 0.56 to 0.57 for predicting the subtype of treatment failure. In analysis 2, the accuracy ranged from 0.74 to 0.75 for predicting overall treatment failure and from 0.61 to 0.63 for predicting the subtype of treatment failure. XGBoost, MLP, and Transformer models showed small improvements compared with LR. Using the top 9 key features identified from the Shapley Additive Explanations analysis, the Transformer model performed similarly in accuracy and area under the curve to its counterpart using the full feature set.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Performance across the LR, MLP, XGBoost, and Transformer models was moderate. The advanced ML models performed slightly better than LR. Overall, the results suggest that further model advancements and increased data availability are needed to better predict treatment failure with SGLT2i. The LR coefficients from the key features model may inform the development of a risk score to predict SGLT2i treatment failure. Accurate prediction could inform individualized treatment planning for individuals with T2D.</p></sec></abstract><kwd-group><kwd>sodium-glucose cotransporter-2 inhibitors</kwd><kwd>SGLT2i</kwd><kwd>type 2 diabetes</kwd><kwd>artificial intelligence</kwd><kwd>machine learning</kwd><kwd>treatment failure</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Type 2 diabetes (T2D), a chronic and progressive metabolic condition caused by inefficient production or use of insulin and identified by elevated blood glucose levels, affects more than 35 million adults in the United States [<xref ref-type="bibr" rid="ref1">1</xref>]. Sodium-glucose cotransporter-2 inhibitors (SGLT2i), a commonly prescribed therapy to reduce blood glucose levels in people with T2D, promote the excretion of glucose and reduce its reabsorption into the bloodstream [<xref ref-type="bibr" rid="ref2">2</xref>]. SGLT2i also have additional benefits beyond glycemic control, including renal and cardiovascular benefits [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. The American Diabetes Association recommends SGLT2i for individuals with T2D and chronic kidney disease (CKD) or kidney damage, as well as those with T2D and heart failure [<xref ref-type="bibr" rid="ref5">5</xref>].</p><p>Despite the benefits of SGLT2i in managing T2D, treatment failure rates are high among individuals in real-world settings [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref8">8</xref>], particularly among those with comorbidities [<xref ref-type="bibr" rid="ref9">9</xref>]. Treatment failure is a multifaceted problem, and there is currently no consensus on the definition of treatment failure with SGLT2i in people with T2D. Typical components of treatment failure may include discontinuation, switching to other glucose-lowering therapies (GLT), and treatment intensification [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. Treatment failure with SGLT2i may also be characterized by not reaching the desired treatment goal after a certain amount of time or the attenuation of SGLT2i benefits over time [<xref ref-type="bibr" rid="ref11">11</xref>].</p><p>Previous research has identified several factors associated with SGLT2i treatment discontinuation, including older age, specific comorbidities (eg, ischemic heart disease, chronic obstructive pulmonary disease, CKD, and cancer), the type of other GLT used at baseline, adverse effects, lack of efficacy, and financial reasons [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. However, the literature lacks a comprehensive assessment of the factors contributing to treatment failure with SGLT2i or its different components, and identifying individuals at higher risk of treatment failure with SGLT2i can be clinically challenging. Identifying individuals for whom SGLT2i treatment is likely to fail has implications for treatment planning and clinical management and, ultimately, the achievement of treatment goals. The prediction of treatment failure with SGLT2i would enable health care professionals to take actions to reduce the likelihood of treatment failure or plan for possible failure based on the individual&#x2019;s unique set of characteristics and circumstances.</p><p>Advanced machine learning (ML) models could potentially improve the prediction of treatment failure with SGLT2i in individuals with T2D. By analyzing data on many demographic characteristics, clinical parameters, laboratory values, and treatment histories, advanced ML models may offer advantages over conventional statistical methods for identifying factors and patterns that predict complex, multifaceted outcomes. In contrast to traditional techniques, which often rely on expert-driven a priori feature selection, modern algorithms such as gradient boosting and deep neural networks can automatically screen hundreds of candidate variables, rank their relative importance, and surface nonobvious predictors with minimal manual effort [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. Additionally, by incorporating a broad array of variables, advanced ML models can potentially improve the accuracy and precision of outcome predictions as their flexible architectures capture complex nonlinear relationships and high-order interactions that conventional techniques, such as standard regression frameworks, struggle to represent without extensive handcrafting.</p><p>This study aimed to develop and internally validate ML models to predict treatment failure with SGLT2i among people with T2D using linked electronic health records (EHRs) and claims data. Specifically, we aimed to use ML models to predict treatment failure with SGLT2i and its subtypes.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design and Data Source</title><p>This retrospective observational cohort study included adults with T2D treated with SGLT2i. Data were obtained from Optum deidentified Market Clarity Data (Optum Market Clarity), comprising EHRs linked to medical and pharmacy claims from Optum-affiliated payers and additional third-party claims. The database contains records on more than 86 million US people and is considered nationally representative of the US population.</p></sec><sec id="s2-2"><title>Ethical Considerations</title><p>This study was conducted in accordance with the Declaration of Helsinki of 1975 and its subsequent revisions. The study database is Health Insurance Portability and Accountability Act (HIPAA)&#x2013;compliant and deidentified, and data were analyzed without reidentification of or contact with study participants. Because this study used only previously collected, deidentified data, informed consent from study participants was not necessary. The WCG Institutional Review Board determined this study to be exempt under 45 Code of Federal Regulations (CFR) &#x00A7; 46.104(d)(4) [<xref ref-type="bibr" rid="ref15">15</xref>].</p></sec><sec id="s2-3"><title>Data Selection</title><p>The study period ran from January 1, 2016, through September 30, 2024. The index date was defined as the date of the first eligible SGLT2i pharmacy claim. The period for identifying eligible individuals for the study ran from December 31, 2016, through April 3, 2024, to include 1 year before the index date and a 180-day outcome assessment period (the interval from the index date to the end of the follow-up). Individuals were followed from the index date to the end of continuous claims enrollment, death, or the end of the study period, whichever occurred first. Two analyses, corresponding to two predictor lookup periods, were conducted to examine predictors of treatment failure with SGLT2i over two periods: (1) the year before SGLT2i treatment initiation (ie, analysis 1; <xref ref-type="fig" rid="figure1">Figure 1</xref>); and (2) the year before treatment failure with SGLT2i (ie, analysis 2; <xref ref-type="fig" rid="figure1">Figure 1</xref>). It was important to assess both time periods for predictors, as each can offer distinct, yet complementary, information. Analysis 1 focuses on predictors that can inform clinical decision-making before the initiation of treatment, whereas analysis 2 focuses on predictors more proximal to treatment failure, potentially informing clinical decision-making after treatment initiation.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Study schematics. (A) Schematic of analysis 1; (B) schematic of analysis 2; and (C) schematic of data preparation, optimization, prediction, and evaluation steps. SLGT2i: sodium-glucose cotransporter-2 inhibitors; d, days; y, year;</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="diabetes_v11i1e85372_fig01.png"/></fig></sec><sec id="s2-4"><title>Inclusion and Exclusion Criteria</title><p>Individuals were eligible for inclusion in the study if they were newly initiated on an SGLT2i treatment (bexagliflozin, canagliflozin, dapagliflozin, empagliflozin, and ertugliflozin) between December 31, 2016, and April 3, 2024, and had 2 or more diagnoses of T2D on 2 or more distinct days during the study period, with the first T2D diagnosis on or before the index date. Eligible individuals also had continuous claims enrollment in the predictor lookup and outcome assessment periods, were aged 18 years or older on the index date, had 1 or more glycated hemoglobin (HbA<sub>1c</sub>) value &#x2265;7% during the baseline period or on the index date, and had 1 or more HbA<sub>1c</sub> value from 180 days after the index date until the end of follow-up. Individuals were excluded if they had 1 or more diagnoses of type 1 diabetes during the study period, had missing data for sex, initiated another new GLT drug class on the index date, used any glucagon-like peptide-1 receptor agonist (GLP-1 RA) obesity medications during the study period, had evidence of pregnancy, or had CKD stage 5 or end-stage kidney disease during the baseline period or on the index date (Figure S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p></sec><sec id="s2-5"><title>Outcomes</title><p>The study outcome was treatment failure with SGLT2i, adopted from our previous work [<xref ref-type="bibr" rid="ref8">8</xref>], which was defined as the occurrence of any of the following events during the outcome assessment period: initiation of a new GLT class (&#x201C;failure with action&#x201D;), discontinuation of SGLT2i (&#x201C;discontinuation&#x201D;), or HbA<sub>1c</sub> not under control (&#x201C;inertial failure&#x201D;). Initiation of a new GLT class was defined as any pharmacy claim for a new GLT class (insulin, thiazolidinediones, sulfonylureas, GLP-1 RA, biguanides, meglitinides, alpha-glucosidase inhibitors, or dipeptidyl peptidase 4 inhibitors) during the follow-up period, including either switching to or adding a new GLT class. Discontinuation of SGLT2i was defined as a gap of 90 or more days for any SGLT2i during the follow-up period. Switching within an SGLT2i class was not considered discontinuation. HbA<sub>1c</sub> was considered uncontrolled if 2 or more consecutive HbA<sub>1c</sub> laboratory values were 8% or more on distinct days (&#x2265;90 days apart) between the index date plus 180 days and the end of the follow-up period. Nonfailure was defined as not meeting the definition for 1 of the failure subtypes. The primary outcome in this study was a binary variable for the occurrence of treatment failure with SGLT2i (treatment failure did or did not occur). The secondary outcome was a multiclass variable for the subtypes of treatment failure&#x2014;failure with action, treatment discontinuation, or inertial failure. Additional details regarding the definitions of treatment failure with SGLT2i have been previously described [<xref ref-type="bibr" rid="ref8">8</xref>].</p></sec><sec id="s2-6"><title>Predictors</title><p>Potential predictors of treatment failure with SGLT2i included index year, demographic characteristics (age, sex, race, and geographic region), clinical characteristics (BMI [&#x003C;25, 25&#x2010;29.9, 30&#x2010;34.9, 35&#x2010;39.9, or &#x2265;40 kg/m<sup>2</sup>], kidney function and CKD (stage 1, 2, 3, or 4), time from first observed T2D diagnosis to the index date), laboratory tests (HbA<sub>1c</sub>, low-density lipoprotein cholesterol, high-density lipoprotein cholesterol, very low-density lipoprotein cholesterol, total cholesterol, triglycerides, and C-reactive protein), and vital signs (systolic and diastolic blood pressure measurements; Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The following baseline comorbidities were also included: atrial fibrillation, ischemic heart disease, deep vein thrombosis or pulmonary embolism, hypertension, dyslipidemia, components of atherosclerotic cardiovascular disease, myocardial infarction, other coronary heart disease, peripheral artery disease, metabolic dysfunction&#x2013;associated steatohepatitis or metabolic dysfunction&#x2013;associated steatotic liver disease, obesity, anxiety, depression, asthma, musculoskeletal pain, and osteoarthritis. Other predictors included the Charlson Comorbidity Index adjusted without diabetes [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>], the Diabetes Complications Severity Index [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>], tobacco smoking status, GLT use, selected non-GLT treatment use, health care resource utilization (all-cause and T2D-related hospitalizations and emergency department visits), and health care costs (for all-cause and T2D-related hospitalizations and emergency department visits). Detailed information regarding predictor construction can be found in Tables S2-S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s2-7"><title>Analysis</title><sec id="s2-7-1"><title>Model Overview and Performance Evaluation</title><p>For analyses 1 and 2, descriptive statistics (mean and SD for continuous variables and counts and percentages for categorical variables) were calculated for demographic characteristics, baseline comorbidities, clinical characteristics, laboratory tests, vital signs, GLT use, selected non-GLT treatment use, health care resource utilization, and health care costs. For both analyses, we evaluated the performance of 4 models&#x2014;logistic regression (LR), multilayer perceptron (MLP), extreme gradient boosting (XGBoost), and Transformer&#x2014;to predict treatment failure with SGLT2i. LR, a traditional statistical method known for its simplicity and interpretability, is suitable for binary classification tasks [<xref ref-type="bibr" rid="ref20">20</xref>]. MLP, XGBoost, and Transformer are advanced ML models. MLP is a feedforward artificial neural network useful for capturing nonlinear relationships through hidden layers [<xref ref-type="bibr" rid="ref21">21</xref>]. XGBoost is a gradient boosting framework known for its efficiency and accuracy in handling structured data and managing feature interactions [<xref ref-type="bibr" rid="ref22">22</xref>]. Transformer leverages self-attention mechanisms to process data efficiently and capture complex patterns in diverse datasets [<xref ref-type="bibr" rid="ref23">23</xref>]. For all analyses, we split the data with 80% as a training set (including model optimization) and 20% as a test set for internal validation. After training and optimization on the training set, we used the test set to evaluate the performance of each model and generate confusion matrices [<xref ref-type="bibr" rid="ref24">24</xref>]. Model performance was evaluated based on accuracy, receiver operating characteristic area under the curve (ROC AUC), precision, recall, and <italic>F</italic><sub>1</sub>-score [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. Precision, recall, and <italic>F</italic><sub>1</sub>-score were computed for the positive class for the binary outcome, and support-weighted averaging was used for the multiclass outcome to account for class imbalance. Analyses were conducted in Python (version 3.10; Python Software Foundation) using <italic>scikit-learn</italic>, XGBoost, and PyTorch, along with supporting libraries such as <italic>NumPy</italic> and <italic>Pandas</italic>.</p></sec><sec id="s2-7-2"><title>Data Preparation and Handling of Missing Data</title><p>An illustration of the data preparation and processing workflow for each model (LR, MLP, XGBoost, and Transformer) is provided in Figure S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. In preparing the dataset for the LR model, categorical variables were factorized and transformed into integers based on their unique categories. Numerical variables were normalized using MinMax scaling and centered at the midpoint before model fitting, ensuring all variables were on a comparable scale. Missing values were imputed with 0 on the transformed feature scale, thereby providing a neutral or reference value; because the data were transformed before imputation, an imputed 0 did not indicate an actual clinical measurement of 0. Missing values across all variables (categorical and numerical) were filled with 0 as a consistent approach to minimize their impact on model parameters.</p><p>For the MLP model, categorical variables were embedded into vectors, and missing values were filled with 0. Numerical variables were normalized using MinMax scaling. Because a significant number of numerical variables naturally had 0 values, missing numerical values were not filled with 0. To distinguish between actual 0 values and missing numerical values, a masking strategy was used to flag missing data, with each numerical vector concatenated with a binary mask to indicate missingness. This approach ensured that data abnormalities were managed without distortion and facilitated the MLP model&#x2019;s ability to integrate both categorical embeddings and numerical inputs efficiently.</p><p>For the XGBoost model, categorical variables in the dataset were converted to a string format to ensure consistent handling, using XGBoost&#x2019;s built-in method of setting &#x201C;unknown&#x201D; values to missing. XGBoost inherently handles missing data by learning default directions for nodes in decision trees and assigning optimal paths for instances with missing values during training. This approach simplifies data preparation and maintains performance without requiring imputation of missing values.</p><p>For the Transformer model, categorical variables were processed through embeddings, converting them into dense vector representations, with any missing values filled with 0. As with the MLP model, numerical variables were normalized using MinMax scaling, and missing values were handled by creating a mask to indicate the presence of missing data. This approach to missingness also facilitated the Transformer&#x2019;s ability to use information on both missing and nonmissing values effectively. By integrating positional encoding, the Transformer model encapsulates categorical and numerical features and their complex interactions, enhancing its predictive capabilities on tabular data.</p><p>In routinely collected EHRs or claims data, missing data are often informative because they are driven by care processes rather than random measurement error [<xref ref-type="bibr" rid="ref27">27</xref>]. We did not use multiple imputation to impute missing data for any model because multiple imputation relies on strong assumptions (eg, missing-at-random) and a well-specified imputation model. Applying multiple imputation indiscriminately when missingness is structurally driven by routine care may introduce bias or clinically implausible values [<xref ref-type="bibr" rid="ref28">28</xref>].</p></sec><sec id="s2-7-3"><title>Parameterization</title><p>For the MLP and Transformer models, hyperparameter optimization, including tuning learning rate, batch size, number of epochs, and architecture, was conducted using a representative subset of 3000 individuals from the training dataset. This subset of 3000 individuals was randomly sampled from the training set to reduce the substantial computational burden associated with hyperparameter optimization for advanced ML models. The optimized hyperparameters were then applied to train the full dataset [<xref ref-type="bibr" rid="ref29">29</xref>]. For the XGBoost model, optimization was performed through 5-fold cross-validation on the training set, evaluating accuracy and <italic>F</italic><sub>1</sub>-scores across combinations of hyperparameters (eg, gamma, maximum depth, number of estimators, and learning rate). The optimal set of hyperparameters was selected based on cross-validation results, and the final model was retrained on the full training data before evaluation on the test set. The Appendix provides details on the optimization strategy (Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), optimized hyperparameters (Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), and final training iterations or epochs on the full training split (Table S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) for each model. The same hyperparameter sets and optimization strategies were applied in analyses 1 and 2.</p></sec><sec id="s2-7-4"><title>Feature Importance</title><p>Feature importance scores were derived to identify the contribution of individual variables in predicting SLGT2i treatment failure. Model-specific techniques were used to assess feature importance for each model. In the LR model, we compared the absolute values of coefficients and sorted them by magnitude to determine each feature&#x2019;s relative influence. In the XGBoost model, the model&#x2019;s internal function was used for importance calculation, focusing on metrics such as gain, cover, and frequency. &#x201C;Gain&#x201D; indicates the contribution of a feature to the model&#x2019;s predictive capabilities, &#x201C;cover&#x201D; assesses the number of observations affected by splits on the feature, and &#x201C;frequency&#x201D; counts the occurrences of a feature in decision trees, thereby revealing the feature&#x2019;s significance. In the Transformer model, a Shapley Additive Explanations (SHAP) analysis was conducted to interpret feature importance [<xref ref-type="bibr" rid="ref30">30</xref>]. SHAP attributes the contribution of each feature according to its impact on model predictions. To enhance clinical utility, defined as the extent to which a predictive model informs clinical decision-making and actions to improve patient care, and support individualized treatment strategies, we identified key features based on their importance values in the best models. We ranked candidate predictors by their mean absolute SHAP value (global importance) and then selected features with clinical utility for further analysis to inform the development of a risk score or algorithm to predict treatment failure of SGLT2i. Upon conducting model comparisons, it was observed that the Transformer model performed slightly better than the other models while maintaining stability. Therefore, we focused on the Transformer model for key feature identification.</p></sec></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Baseline Characteristics</title><p>A total of 62,222 individuals with T2D who initiated treatment with SGLT2i during the study period were included in the analyses (Tables S5 and S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). More than half of the sample (n=35,487, 57%) was male, the mean (SD) age was 62.7 (12.0) years, and most individuals were White (n=44,774, 72%). Most individuals had commercial insurance (n=27,378, 44%) or Medicare (n=29,882, 48%). The most common comorbidities were hyperlipidemia (n=50,851, 81.7%), hypertension (n=50,681, 81.5%), and obesity (n=28,374, 45.6%; Table S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Overall, 71% (n=44,156) of individuals in the study experienced treatment failure with SGLT2i. When subtypes were assessed, failure with action (n=23,839, 38.3%) was more common than discontinuation (n=16,449, 26.4%) and inertial failure (n=3868, 6.2%; Table S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p></sec><sec id="s3-2"><title>Features Analysis and Model Evaluation Results</title><p>In both analyses 1 and 2, using the full dataset, model performance was moderate overall (<xref ref-type="table" rid="table1">Table 1</xref>). Compared with the LR model, the Transformer, XGBoost, and MLP models showed small improvements (<xref ref-type="table" rid="table1">Table 1</xref>). In analysis 1, accuracy across models ranged from 0.72 to 0.73 for overall failure or not (binary outcome) and from 0.56 to 0.57 for the subtypes of failure (multiclass outcome). ROC AUC across models ranged from 0.69 to 0.70 for overall failure and from 0.63 to 0.64 for the failure subtypes (<xref ref-type="fig" rid="figure2">Figure 2</xref>). In analysis 2, accuracy across models ranged from 0.74 to 0.75 for overall failure and from 0.61 to 0.63 for the failure subtypes. ROC AUC ranged from 0.74 to 0.75 for overall failure and from 0.72 to 0.75 for the failure subtypes (<xref ref-type="fig" rid="figure3">Figure 3</xref>). Similar patterns were observed for precision, recall, and <italic>F</italic><sub>1</sub>-score estimates in both analyses 1 and 2. Notably, models developed for analysis 2 consistently outperformed those from analysis 1, particularly in the subtypes of the failure prediction task (<xref ref-type="table" rid="table1">Table 1</xref>).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Performance metrics for models used to predict treatment failure with SGLT2i<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> among people with type 2 diabetes.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Model</td><td align="left" valign="top">Accuracy</td><td align="left" valign="top">ROC AUC<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td><td align="left" valign="top">Precision</td><td align="left" valign="top">Recall</td><td align="left" valign="top"><italic>F</italic><sub>1</sub>-score</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="6">Analysis 1<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td></tr><tr><td align="left" valign="top" colspan="6"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Overall failure (failure vs not)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>LR<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup></td><td align="left" valign="top">0.72</td><td align="left" valign="top">0.69</td><td align="left" valign="top">0.73</td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.83</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>MLP<sup><xref ref-type="table-fn" rid="table1fn5">e</xref></sup></td><td align="left" valign="top">0.72</td><td align="left" valign="top">0.69</td><td align="left" valign="top">0.74</td><td align="left" valign="top">0.92</td><td align="left" valign="top">0.82</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>XGBoost<sup><xref ref-type="table-fn" rid="table1fn6">f</xref></sup></td><td align="left" valign="top">0.73</td><td align="left" valign="top">0.70</td><td align="left" valign="top">0.74</td><td align="left" valign="top">0.95</td><td align="left" valign="top">0.83</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Transformer</td><td align="left" valign="top">0.73</td><td align="left" valign="top">0.70</td><td align="left" valign="top">0.73</td><td align="left" valign="top">0.96</td><td align="left" valign="top">0.83</td></tr><tr><td align="left" valign="top" colspan="6"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Subtype of failure (discontinuation, failure with action, and inertial failure) or nonfailure</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>LR</td><td align="left" valign="top">0.56</td><td align="left" valign="top">0.63</td><td align="left" valign="top">0.53</td><td align="left" valign="top">0.56</td><td align="left" valign="top">0.50</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>MLP</td><td align="left" valign="top">0.56</td><td align="left" valign="top">0.63</td><td align="left" valign="top">0.50</td><td align="left" valign="top">0.56</td><td align="left" valign="top">0.46</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>XGBoost</td><td align="left" valign="top">0.57</td><td align="left" valign="top">0.64</td><td align="left" valign="top">0.53</td><td align="left" valign="top">0.57</td><td align="left" valign="top">0.50</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Transformer</td><td align="left" valign="top">0.57</td><td align="left" valign="top">0.63</td><td align="left" valign="top">0.56</td><td align="left" valign="top">0.57</td><td align="left" valign="top">0.49</td></tr><tr><td align="left" valign="top" colspan="6">Analysis 2<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td></tr><tr><td align="left" valign="top" colspan="6"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Overall failure (failure vs not)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>LR</td><td align="left" valign="top">0.74</td><td align="left" valign="top">0.74</td><td align="left" valign="top">0.76</td><td align="left" valign="top">0.93</td><td align="left" valign="top">0.84</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>MLP</td><td align="left" valign="top">0.74</td><td align="left" valign="top">0.74</td><td align="left" valign="top">0.76</td><td align="left" valign="top">0.93</td><td align="left" valign="top">0.84</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>XGBoost</td><td align="left" valign="top">0.74</td><td align="left" valign="top">0.74</td><td align="left" valign="top">0.77</td><td align="left" valign="top">0.92</td><td align="left" valign="top">0.84</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Transformer</td><td align="left" valign="top">0.75</td><td align="left" valign="top">0.75</td><td align="left" valign="top">0.77</td><td align="left" valign="top">0.91</td><td align="left" valign="top">0.83</td></tr><tr><td align="left" valign="top" colspan="6"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Subtype of failure (discontinuation, failure with action, and inertial failure) or nonfailure</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>LR</td><td align="left" valign="top">0.61</td><td align="left" valign="top">0.73</td><td align="left" valign="top">0.60</td><td align="left" valign="top">0.61</td><td align="left" valign="top">0.59</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>MLP</td><td align="left" valign="top">0.61</td><td align="left" valign="top">0.72</td><td align="left" valign="top">0.60</td><td align="left" valign="top">0.61</td><td align="left" valign="top">0.59</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>XGBoost</td><td align="left" valign="top">0.63</td><td align="left" valign="top">0.75</td><td align="left" valign="top">0.62</td><td align="left" valign="top">0.63</td><td align="left" valign="top">0.61</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Transformer</td><td align="left" valign="top">0.61</td><td align="left" valign="top">0.73</td><td align="left" valign="top">0.61</td><td align="left" valign="top">0.61</td><td align="left" valign="top">0.60</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>SGLT2i: sodium-glucose cotransporter-2 inhibitor.</p></fn><fn id="table1fn2"><p><sup>b</sup>ROC AUC: receiver operating characteristic area under the curve.</p></fn><fn id="table1fn3"><p><sup>c</sup>Analyses were conducted to examine predictors of treatment failure with SGLT2i over two periods: (1) the year before SGLT2i treatment initiation (ie, analysis 1) and (2) the year before treatment failure with SGLT2i (ie, analysis 2).</p></fn><fn id="table1fn4"><p><sup>d</sup>LR: logistic regression.</p></fn><fn id="table1fn5"><p><sup>e</sup>MLP: multilayer perceptron.</p></fn><fn id="table1fn6"><p><sup>f</sup>XGBoost: extreme gradient boosting.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Receiver operating characteristic (ROC) curves (analysis 1). AUC: area under the curve; XGBoost: extreme gradient boosting. Models used to predict overall failure: (A) logistic regression; (B) multilayer perceptron; (C) XGBoost; (D) Transformer. Models used to predict failure subtype: (E) logistic regression; (F) multilayer perceptron; (G) XGBoost; (H) Transformer.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="diabetes_v11i1e85372_fig02.png"/></fig><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Receiver operating characteristic (ROC) curves (analysis 2). AUC: area under the curve; XGBoost: extreme gradient boosting. Models used to predict overall failure: (A) logistic regression; (B) multilayer perceptron; (C) XGBoost; (D) Transformer. Models used to predict failure subtype: (E) logistic regression; (F) multilayer perceptron; (G) XGBoost; (H) Transformer.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="diabetes_v11i1e85372_fig03.png"/></fig></sec><sec id="s3-3"><title>Feature Importance and Key Features Analysis</title><p>The SHAP analysis based on the Transformer model uncovered consistent feature importance patterns across analyses 1 and 2. Index year, HbA<sub>1c</sub>, and use of GLP-1 RA at baseline exhibited the highest importance scores in both analyses. Additionally, comparative examination of mean SHAP values and visualizations for the remaining individual features revealed similarities across the 2 analyses, with nuanced variations in the relative magnitude of these features. Analysis 2 also identified features after SGLT2i initiation, such as having experienced a urinary tract infection (<xref ref-type="fig" rid="figure4">Figure 4</xref>).</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Transformer model in analysis 1 and analysis 2: feature importance bar plot from Shapley Additive Explanations (SHAP) analysis. (A) Analysis 1. Mean SHAP Value (Importance Score); (B) Analysis 2. Mean SHAP Value (Importance Score). CCI: Charlson Comorbidity Index; CKD: chronic kidney disease; DCSI: Diabetes Complication Severity Index; DPP4i: dipeptidyl peptidase 4 inhibitor; GLP-1 RA: glucagon-like peptide-1 receptor agonist; GLT: glucose-lowering therapy; HbA<sub>1c</sub>: glycated hemoglobin; UTI: urinary tract infection.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="diabetes_v11i1e85372_fig04.png"/></fig><p>We identified key features for further analysis to support the development of a risk score or algorithm to predict treatment failure with SGLT2i. Because the index year, regarded as a proxy for follow-up time, may not be useful in clinical practice given the study period of this particular analysis and its potential limited utility in clinical decision-making, we excluded it from further analysis of key features. We tested both Transformer and LR models to evaluate their effectiveness using the remaining 9 top features (<xref ref-type="fig" rid="figure4">Figure 4</xref>): HbA<sub>1c</sub>&#x2265;8% during the baseline period or before the index date, use of GLP-1 RA during the baseline period; HbA<sub>1c</sub>&#x2265;7% during the baseline period or before the index date, use of insulin during the baseline period; and latest HbA<sub>1c</sub> during the predictor lookup period before the outcome date, payer type, use of biguanides during the baseline period, age at index date (group), and kidney function and CKD stage during the index date to the outcome date. The number of key features was not determined by a statistical SHAP threshold; rather, it was chosen pragmatically to support clinical utility by limiting the number of required inputs and enabling a parsimonious model that could be operationalized as a simple risk score. Using the 9 selected key features, the Transformer model had slightly higher accuracy (0.75 vs 0.74) and recall (0.98 vs 0.97) and the same ROC AUC (0.70), precision (0.75), and <italic>F</italic><sub>1</sub>-score (0.85) compared with the LR model (<xref ref-type="table" rid="table2">Table 2</xref>). The coefficients from the LR model with the 9 key features are presented in <xref ref-type="table" rid="table3">Table 3</xref> and may inform further development of a risk score or algorithm to conveniently predict treatment failure with SGLT2i in routine clinical practice.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Performance from the logistic regression and Transformer models with 9 key features.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Model</td><td align="left" valign="top">Accuracy</td><td align="left" valign="top">ROC AUC<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="top">Precision</td><td align="left" valign="top">Recall</td><td align="left" valign="top"><italic>F</italic><sub>1</sub>-score</td></tr></thead><tbody><tr><td align="left" valign="top">LR model with 9 key features</td><td align="left" valign="top">0.74</td><td align="left" valign="top">0.70</td><td align="left" valign="top">0.75</td><td align="left" valign="top">0.97</td><td align="left" valign="top">0.85</td></tr><tr><td align="left" valign="top">Transformer model with 9 key features</td><td align="left" valign="top">0.75</td><td align="left" valign="top">0.70</td><td align="left" valign="top">0.75</td><td align="left" valign="top">0.98</td><td align="left" valign="top">0.85</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>ROC AUC: receiver operating characteristic area under the curve.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>The coefficients from the logistic regression (LR) model with the 9 key features.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Key feature</td><td align="left" valign="top">LR coefficient (log odds)</td></tr></thead><tbody><tr><td align="left" valign="top">HbA<sub>1c</sub><sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> &#x2265;8% during the baseline period or before index date</td><td align="char" char="." valign="top">0.89</td></tr><tr><td align="left" valign="top">Use of GLP-1 RA<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup> during the baseline period</td><td align="char" char="." valign="top">&#x2013;0.50</td></tr><tr><td align="left" valign="top">HbA<sub>1c</sub> &#x2265;7% during the baseline period or before index date</td><td align="char" char="." valign="top">&#x2013;0.40</td></tr><tr><td align="left" valign="top">Use of insulin during the baseline period</td><td align="char" char="." valign="top">&#x2013;0.33</td></tr><tr><td align="left" valign="top">Latest HbA<sub>1c</sub> during the predictor lookup period before outcome date</td><td align="char" char="." valign="top">0.16</td></tr><tr><td align="left" valign="top">Payer</td><td align="char" char="." valign="top">&#x2013;0.15</td></tr><tr><td align="left" valign="top">Use of biguanides during the baseline period</td><td align="char" char="." valign="top">0.14</td></tr><tr><td align="left" valign="top">Age at index date (group)</td><td align="char" char="." valign="top">&#x2013;0.02</td></tr><tr><td align="left" valign="top">Kidney function and CKD<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup> stage from the index date to the outcome date</td><td align="char" char="." valign="top">0.02</td></tr><tr><td align="left" valign="top">Constant (intercept)</td><td align="char" char="." valign="top">&#x2013;0.49</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>HbA<sub>1c</sub>: glycated hemoglobin.</p></fn><fn id="table3fn2"><p><sup>b</sup>GLP-1 RA: glucagon-like peptide-1 receptor agonist.</p></fn><fn id="table3fn3"><p><sup>c</sup>CKD: chronic kidney disease.</p></fn></table-wrap-foot></table-wrap></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>This real-world, observational study showed that treatment failure with SGLT2i is prevalent among people with T2D in the United States, which is consistent with our previous work [<xref ref-type="bibr" rid="ref8">8</xref>]. We found that ML models produce modest performance improvements relative to traditional LR for predicting treatment failure with SGLT2i. Furthermore, we identified 9 predictors that may be particularly influential in predicting treatment failure with SGLT2i, including those readily available in EHR for individuals with T2D, such as HbA<sub>1c</sub> levels, use of GLT, and kidney function. This study&#x2019;s strengths included using a nationally representative US database with a large population and examining predictors in 2 reference periods&#x2014;the year before treatment initiation with SGLT2i and the year before treatment failure with SGLT2i. This study also used advanced ML approaches to capture complex, nonlinear relationships and automatically identify important predictors from high-dimensional clinical data. Feature importance analysis was conducted to better explain the prediction models and enhance their utility in clinical practice.</p><p>This study provides important information for health care professionals on the high treatment failure rate with SGLT2i among people with T2D in real-world settings and the key predictors that may increase the risk of treatment failure. Overall, across analyses 1 and 2, model performance was moderate [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. Although improvement in model performance is desirable, the findings suggest that advanced ML models, including MLP, XGBoost, and Transformer, can learn relevant patterns from the data. Furthermore, in analyses 1 and 2, these advanced ML models showed modest improvements in performance metrics over traditional LR, indicating a slightly better ability to capture nuanced patterns. The superior model performance in analysis 2 was likely due to postindex data providing information more proximal to treatment failure with SGLT2i, enabling more precise predictions. Having information post-SGLT2i initiation (index date), especially on potential SGLT2i treatment-related adverse events, may help boost predictive accuracy.</p><p>Building on the analyses with the full feature set, we examined the LR and Transformer models using a reduced set of 9 features selected based on importance scores. Both models demonstrated comparable performance, with scores reaching moderate levels overall. Notably, these models performed similarly to those using the full feature set, underscoring the potential utility of parsimonious models incorporating influential predictors. In the context of T2D and SGLT2i treatment, feature ranking facilitates the identification of the most influential predictors for determining treatment failure with SGLT2i. From a clinical perspective, ML and strategic feature selection could empower health care professionals by helping them identify individuals at the highest risk of treatment failure and other treatment-related outcomes, such as medication nonadherence, for further treatment planning or intervention [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref35">35</xref>].</p><p>Although ML models hold promise for clinical prediction, data quality issues, the need for large datasets, costs, and implementation challenges in real-world settings impede the uptake of ML-based prediction models in clinical practice [<xref ref-type="bibr" rid="ref36">36</xref>]. LR is commonly used to predict clinical outcomes, including treatment failure, in clinical practice settings due to its familiarity among health care professionals and its ability to handle binary outcomes. Coefficients from LR models offer insight into the relationship between a set of predictors and the likelihood of treatment failure. Coefficients from the LR model used in our study could inform the development of a future prediction tool to estimate the likelihood of treatment failure with SGLT2i among individuals with T2D. Until such a prediction tool is available, the key features identified in this study (eg, HbA<sub>1c</sub>&#x2265;7%, payer type, or use of insulin, biguanides, or GLP-1 RA before SGLT2i initiation) could be of value for clinical decision-making to reduce treatment failure among people with T2D.</p><p>Future research is needed to refine and validate ML models across populations and settings to increase the applicability of ML-based predictive results to clinical practice in T2D while addressing deficiencies in data quality and quantity [<xref ref-type="bibr" rid="ref37">37</xref>]. Specifically, there is a need to increase the breadth of predictors to encompass health behaviors, family history, genetic information, social determinants of health, patient-reported outcomes, and comprehensive laboratory data. Concurrently, data quality must also be addressed to improve its accuracy and completeness, thereby enhancing the efficiency and utility of predictive analytics. Furthermore, although ML methodologies, including deep learning, have demonstrated efficacy in medical imaging, diagnosis, and early detection of disease or complications, their performance in predicting clinical outcomes is suboptimal. There is a need to develop ML models that can utilize complex, high-dimensional data to produce highly accurate, clinically relevant information [<xref ref-type="bibr" rid="ref38">38</xref>-<xref ref-type="bibr" rid="ref40">40</xref>]. Given the modest performance gains observed, LR remains a strong baseline for clinical deployment due to its superior interpretability. Our results also indicate that the current data and composite end point likely cap performance across models, while advanced architectures may provide meaningful value with richer, longitudinal, or multimodal inputs for tasks closer to the failure event, as suggested by the consistently better performance in analysis 2. Finally, the 9-feature model serves as a pragmatic bridge to clinical use by retaining comparable performance with far greater simplicity.</p><p>This study had some limitations. Although ML models can identify features strongly correlated with treatment failure with SGLT2i, these correlations do not necessarily imply causation. This is an inherent limitation of observational study design. Second, the datasets used to build and validate the models comprised administrative claims-linked EHR data, which were not collected for research purposes. These data may include potential inaccuracies in diagnostic coding, measurement error, and incomplete information on variables used in the predictive models. Different strategies for handling missing data may enable some models to use missingness patterns more directly than others, potentially influencing model performance. However, these strategies reflect inherent differences in model architecture and standard practice and are unlikely to have driven model performance because differences between advanced models and LR were small overall. Although explicit masking could confer an advantage for deep learning models, we observed only modest improvements. Given the large sample size and the substantial computational burden associated with training advanced models&#x2014;particularly XGBoost and the Transformer&#x2014;we used a fixed 80%/20% training/testing split for final model evaluation. Although performance metrics based on a single internal split may not fully capture variability across data partitions, this approach enabled consistent comparisons of performance. Regarding the design of predictors, we aimed at clinical interpretability, with conditions and drug utilization over a period of time summarized. A more complex design incorporating the longitudinal nature of predictors (ie, the timing and sequence) or more free-form predictors (eg, unlabeled diagnosis codes) may improve the predictive accuracy of some of the models but at a loss of clinical interpretation. Furthermore, the datasets used in this study did not contain data on certain factors that could be important for predicting treatment failure with SGLT2i, such as more granular clinical characteristics, behavioral factors, and social determinants of health. Furthermore, although this study&#x2019;s focus was a parsimonious, clinically implementable single-risk score, subtype-specific models could further disentangle underlying mechanisms of treatment failure with SGLT2i, which could be an important avenue for future research. Finally, this study focused on the glycemic control indication for T2D and did not include other indications for SGLT2i (eg, cardiovascular disease).</p><p>This study found a high prevalence of treatment failure with SGLT2i among people with T2D. In predicting overall treatment failure and subtype, model performance was moderate. Models based on the examination of predictors in the year before treatment failure with SGLT2i performed better than models based on the examination of predictors in the year before SGLT2i initiation. The results suggest that further advancements in ML models and additional data are needed to enhance ML-based prediction of treatment failure with SGLT2i. Feature importance analysis may support the development of a risk score or algorithm to inform more timely individualized treatment planning for individuals with T2D.</p></sec></body><back><ack><p>The authors thank Yiwen Cao for providing data management support. Generative artificial intelligence was not used in the preparation of this manuscript. The authors thank Benjamin Doty and Rebecca Hahn of KJT Group, Inc, for providing medical writing support, in accordance with Good Publication Practice and International Committee of Medical Journal Editors guidelines.</p></ack><notes><sec><title>Funding</title><p>This work was funded by Novo Nordisk Inc.</p></sec><sec><title>Data Availability</title><p>The datasets generated and/or analyzed in this study are not publicly available because they were commercially licensed from the data vendor. Restrictions apply to the availability of these data, which were used under license for this study.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: XT, YL, CS, CM, DK</p><p>Data curation: XT, DK</p><p>Formal analysis: DK</p><p>Investigation: XT, YL, CS, CM, DK, XS</p><p>Methodology: XT, YL, CS, CM, DK, XS</p><p>Project administration: XT</p><p>Visualization: XT, DK</p><p>Writing &#x2013; original draft: XT, YL, CS, CM, DK, XS</p><p>Writing &#x2013; review and editing: XT, YL, CS, CM, DK, XS</p></fn><fn fn-type="conflict"><p>XT, YL, CS, and CM are employees of Novo Nordisk Inc. YL, CS, and CM are shareholders of Novo Nordisk A/S. DK is a former employee of Novo Nordisk Inc. XS is a consultant for Novo Nordisk Inc.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">CKD</term><def><p>chronic kidney disease</p></def></def-item><def-item><term id="abb2">EHR</term><def><p>electronic health record</p></def></def-item><def-item><term id="abb3">GLP-1 RA</term><def><p>glucagon-like peptide-1 receptor agonist</p></def></def-item><def-item><term id="abb4">GLT</term><def><p>glucose-lowering therapy</p></def></def-item><def-item><term id="abb5">HbA<sub>1c</sub></term><def><p>glycated hemoglobin</p></def></def-item><def-item><term id="abb6">HIPAA</term><def><p>Health Insurance Portability and Accountability Act</p></def></def-item><def-item><term id="abb7">LR</term><def><p>logistic regression</p></def></def-item><def-item><term id="abb8">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb9">MLP</term><def><p>multilayer perceptron</p></def></def-item><def-item><term id="abb10">ROC AUC</term><def><p>receiver operating characteristic area under the curve</p></def></def-item><def-item><term id="abb11">SGLT2i</term><def><p>sodium-glucose cotransporter-2 inhibitor</p></def></def-item><def-item><term id="abb12">SHAP</term><def><p>Shapley Additive Explanations</p></def></def-item><def-item><term id="abb13">T2D</term><def><p>type 2 diabetes</p></def></def-item><def-item><term id="abb14">XGBoost</term><def><p>extreme gradient boosting</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><article-title>National Diabetes Statistics Report</article-title><source>Centers for Disease Control and Prevention</source><year>2024</year><access-date>2025-03-28</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdc.gov/diabetes/php/data-research">https://www.cdc.gov/diabetes/php/data-research</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Klen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Dol&#x017E;an</surname><given-names>V</given-names> </name></person-group><article-title>Treatment response to SGLT2 inhibitors: from clinical characteristics to genetic variations</article-title><source>Int J Mol Sci</source><year>2021</year><month>09</month><day>10</day><volume>22</volume><issue>18</issue><fpage>9800</fpage><pub-id pub-id-type="doi">10.3390/ijms22189800</pub-id><pub-id pub-id-type="medline">34575958</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Padda</surname><given-names>IS</given-names> </name><name name-style="western"><surname>Mahtani</surname><given-names>AU</given-names> </name><name name-style="western"><surname>Parmar</surname><given-names>M</given-names> </name></person-group><article-title>Sodium-glucose transport protein 2 (SGLT2) inhibitors</article-title><source>StatPearls</source><year>2025</year><access-date>2026-02-04</access-date><publisher-name>StatPearls Publishing</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/books/NBK576405/">https://www.ncbi.nlm.nih.gov/books/NBK576405/</ext-link></comment></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>de Boer</surname><given-names>IH</given-names> </name><name name-style="western"><surname>Khunti</surname><given-names>K</given-names> </name><name name-style="western"><surname>Sadusky</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Diabetes management in chronic kidney disease: a consensus report by the American Diabetes Association (ADA) and Kidney Disease: Improving Global Outcomes (KDIGO)</article-title><source>Diabetes Care</source><year>2022</year><month>12</month><day>1</day><volume>45</volume><issue>12</issue><fpage>3075</fpage><lpage>3090</lpage><pub-id pub-id-type="doi">10.2337/dci22-0027</pub-id><pub-id pub-id-type="medline">36189689</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><collab>American Diabetes Association Professional Practice Committee</collab></person-group><article-title>9. Pharmacologic approaches to glycemic treatment: standards of care in diabetes&#x2014;2025</article-title><source>Diabetes Care</source><year>2025</year><month>01</month><day>1</day><volume>48</volume><issue>Supplement_1</issue><fpage>S181</fpage><lpage>S206</lpage><pub-id pub-id-type="doi">10.2337/dc25-S009</pub-id><pub-id pub-id-type="medline">38078590</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fadini</surname><given-names>GP</given-names> </name><name name-style="western"><surname>Tentolouris</surname><given-names>N</given-names> </name><name name-style="western"><surname>Caballero Mateos</surname><given-names>I</given-names> </name><name name-style="western"><surname>Bellido Casta&#x00F1;eda</surname><given-names>V</given-names> </name><name name-style="western"><surname>Morales Portillo</surname><given-names>C</given-names> </name></person-group><article-title>A multinational real-world study on the clinical characteristics of patients with type 2 diabetes initiating dapagliflozin in Southern Europe</article-title><source>Diabetes Ther</source><year>2020</year><month>02</month><volume>11</volume><issue>2</issue><fpage>423</fpage><lpage>436</lpage><pub-id pub-id-type="doi">10.1007/s13300-019-00744-6</pub-id><pub-id pub-id-type="medline">31863344</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Malik</surname><given-names>ME</given-names> </name><name name-style="western"><surname>Falkentoft</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Jensen</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Discontinuation and reinitiation of SGLT-2 inhibitors and GLP-1R agonists in patients with type 2 diabetes: a nationwide study from 2013 to 2021</article-title><source>Lancet Reg Health Eur</source><year>2023</year><month>06</month><volume>29</volume><fpage>100617</fpage><pub-id pub-id-type="doi">10.1016/j.lanepe.2023.100617</pub-id><pub-id pub-id-type="medline">37265783</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tan</surname><given-names>X</given-names> </name><name name-style="western"><surname>Cao</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Prevalence, treatment patterns, burden, and factors related to treatment failure with sodium-glucose cotransporter 2 inhibitor in adults with type 2 diabetes in the United States</article-title><source>Diabetes Metab Syndr</source><year>2025</year><month>07</month><volume>19</volume><issue>7</issue><fpage>103281</fpage><pub-id pub-id-type="doi">10.1016/j.dsx.2025.103281</pub-id><pub-id pub-id-type="medline">40819425</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kovesdy</surname><given-names>C</given-names> </name><name name-style="western"><surname>Schmedt</surname><given-names>N</given-names> </name><name name-style="western"><surname>Folkerts</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Predictors of cardio-kidney complications and treatment failure in patients with chronic kidney disease and type 2 diabetes treated with SGLT2 inhibitors</article-title><source>BMC Med</source><year>2022</year><month>01</month><day>10</day><volume>20</volume><issue>1</issue><fpage>2</fpage><pub-id pub-id-type="doi">10.1186/s12916-021-02191-2</pub-id><pub-id pub-id-type="medline">35000594</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mocarski</surname><given-names>M</given-names> </name><name name-style="western"><surname>Yeaw</surname><given-names>J</given-names> </name><name name-style="western"><surname>Divino</surname><given-names>V</given-names> </name><etal/></person-group><article-title>Slow titration and delayed intensification of basal insulin among patients with type 2 diabetes</article-title><source>J Manag Care Spec Pharm</source><year>2018</year><month>04</month><volume>24</volume><issue>4</issue><fpage>390</fpage><lpage>400</lpage><pub-id pub-id-type="doi">10.18553/jmcp.2017.17218</pub-id><pub-id pub-id-type="medline">29406841</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Singhal</surname><given-names>M</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>H</given-names> </name><name name-style="western"><surname>Coleman</surname><given-names>CI</given-names> </name><name name-style="western"><surname>Han</surname><given-names>M</given-names> </name><name name-style="western"><surname>Nguyen</surname><given-names>C</given-names> </name><name name-style="western"><surname>Ingham</surname><given-names>M</given-names> </name></person-group><article-title>Effectiveness, treatment durability, and treatment costs of canagliflozin and glucagon-like peptide-1 receptor agonists in patients with type 2 diabetes in the USA</article-title><source>BMJ Open Diabetes Res Care</source><year>2019</year><volume>7</volume><issue>1</issue><fpage>e000704</fpage><pub-id pub-id-type="doi">10.1136/bmjdrc-2019-000704</pub-id><pub-id pub-id-type="medline">31798890</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Montvida</surname><given-names>O</given-names> </name><name name-style="western"><surname>Shaw</surname><given-names>J</given-names> </name><name name-style="western"><surname>Atherton</surname><given-names>JJ</given-names> </name><name name-style="western"><surname>Stringer</surname><given-names>F</given-names> </name><name name-style="western"><surname>Paul</surname><given-names>SK</given-names> </name></person-group><article-title>Long-term trends in antidiabetes drug usage in the U.S.: real-world evidence in patients newly diagnosed with type 2 diabetes</article-title><source>Diabetes Care</source><year>2018</year><month>01</month><volume>41</volume><issue>1</issue><fpage>69</fpage><lpage>78</lpage><pub-id pub-id-type="doi">10.2337/dc17-1414</pub-id><pub-id pub-id-type="medline">29109299</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alsahaf</surname><given-names>A</given-names> </name><name name-style="western"><surname>Petkov</surname><given-names>N</given-names> </name><name name-style="western"><surname>Shenoy</surname><given-names>V</given-names> </name><name name-style="western"><surname>Azzopardi</surname><given-names>G</given-names> </name></person-group><article-title>A framework for feature selection through boosting</article-title><source>Expert Syst Appl</source><year>2022</year><month>01</month><volume>187</volume><fpage>115895</fpage><pub-id pub-id-type="doi">10.1016/j.eswa.2021.115895</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Fitz</surname><given-names>S</given-names> </name><name name-style="western"><surname>Romero</surname><given-names>P</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Rau</surname><given-names>R</given-names> </name><name name-style="western"><surname>Wardrop</surname><given-names>R</given-names> </name><name name-style="western"><surname>Zingales</surname><given-names>L</given-names> </name></person-group><article-title>Neural networks and deep learning: a paradigm shift in information processing, machine learning, and artificial intelligence</article-title><source>The Palgrave Handbook of Technological Finance</source><year>2021</year><publisher-name>Springer International Publishing</publisher-name><fpage>589</fpage><lpage>654</lpage><pub-id pub-id-type="doi">10.1007/978-3-030-65117-6_22</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="web"><article-title>Code of federal regulations (annual edition)</article-title><source>govInfo</source><access-date>2026-04-16</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.govinfo.gov/app/collection/cfr">https://www.govinfo.gov/app/collection/cfr</ext-link></comment></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Charlson</surname><given-names>ME</given-names> </name><name name-style="western"><surname>Pompei</surname><given-names>P</given-names> </name><name name-style="western"><surname>Ales</surname><given-names>KL</given-names> </name><name name-style="western"><surname>MacKenzie</surname><given-names>CR</given-names> </name></person-group><article-title>A new method of classifying prognostic comorbidity in longitudinal studies: development and validation</article-title><source>J Chronic Dis</source><year>1987</year><volume>40</volume><issue>5</issue><fpage>373</fpage><lpage>383</lpage><pub-id pub-id-type="doi">10.1016/0021-9681(87)90171-8</pub-id><pub-id pub-id-type="medline">3558716</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Glasheen</surname><given-names>WP</given-names> </name><name name-style="western"><surname>Cordier</surname><given-names>T</given-names> </name><name name-style="western"><surname>Gumpina</surname><given-names>R</given-names> </name><name name-style="western"><surname>Haugh</surname><given-names>G</given-names> </name><name name-style="western"><surname>Davis</surname><given-names>J</given-names> </name><name name-style="western"><surname>Renda</surname><given-names>A</given-names> </name></person-group><article-title>Charlson Comorbidity Index: ICD-9 update and ICD-10 translation</article-title><source>Am Health Drug Benefits</source><year>2019</year><volume>12</volume><issue>4</issue><fpage>188</fpage><lpage>197</lpage><pub-id pub-id-type="medline">31428236</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Young</surname><given-names>BA</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>E</given-names> </name><name name-style="western"><surname>Von Korff</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Diabetes complications severity index and risk of mortality, hospitalization, and healthcare utilization</article-title><source>Am J Manag Care</source><year>2008</year><month>01</month><volume>14</volume><issue>1</issue><fpage>15</fpage><lpage>23</lpage><pub-id pub-id-type="medline">18197741</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Glasheen</surname><given-names>WP</given-names> </name><name name-style="western"><surname>Renda</surname><given-names>A</given-names> </name><name name-style="western"><surname>Dong</surname><given-names>Y</given-names> </name></person-group><article-title>Diabetes complications severity index (DCSI)-update and ICD-10 translation</article-title><source>J Diabetes Complications</source><year>2017</year><month>06</month><volume>31</volume><issue>6</issue><fpage>1007</fpage><lpage>1013</lpage><pub-id pub-id-type="doi">10.1016/j.jdiacomp.2017.02.018</pub-id><pub-id pub-id-type="medline">28416120</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Peng</surname><given-names>CYJ</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>KL</given-names> </name><name name-style="western"><surname>Ingersoll</surname><given-names>GM</given-names> </name></person-group><article-title>An introduction to logistic regression analysis and reporting</article-title><source>J Educ Res</source><year>2002</year><month>09</month><volume>96</volume><issue>1</issue><fpage>3</fpage><lpage>14</lpage><pub-id pub-id-type="doi">10.1080/00220670209598786</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Bishop</surname><given-names>CM</given-names> </name></person-group><source>Neural Networks for Pattern Recognition</source><year>1995</year><access-date>2026-04-04</access-date><publisher-name>Oxford University Press</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://academic.oup.com/book/52815">https://academic.oup.com/book/52815</ext-link></comment></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>T</given-names> </name><name name-style="western"><surname>Guestrin</surname><given-names>C</given-names> </name></person-group><article-title>XGBoost: a scalable tree boosting system</article-title><access-date>2026-04-29</access-date><conf-name>Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name><conf-date>Aug 13-17, 2016</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.kdd.org/kdd2016/papers/files/rfp0697-chenAemb.pdf">https://www.kdd.org/kdd2016/papers/files/rfp0697-chenAemb.pdf</ext-link></comment></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Vaswani</surname><given-names>A</given-names> </name><name name-style="western"><surname>Shazeer</surname><given-names>N</given-names> </name><name name-style="western"><surname>Parmar</surname><given-names>N</given-names> </name></person-group><article-title>Attention is all you need</article-title><access-date>2026-04-29</access-date><conf-name>Adv Neural Inf Process Syst (NIPS 2017)</conf-name><conf-date>Dec 4-9, 2017</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf">https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf</ext-link></comment></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Ting</surname><given-names>KM</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Sammut</surname><given-names>C</given-names> </name><name name-style="western"><surname>Webb</surname><given-names>GI</given-names> </name></person-group><article-title>Confusion matrix</article-title><source>Encyclopedia of Machine Learning</source><year>2010</year><publisher-name>Springer</publisher-name><fpage>209</fpage><pub-id pub-id-type="doi">10.1007/978-0-387-30164-8_157</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Manning</surname><given-names>CD</given-names> </name><name name-style="western"><surname>Raghavan</surname><given-names>P</given-names> </name><name name-style="western"><surname>Schutz</surname><given-names>H</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Manning</surname><given-names>CD</given-names> </name><name name-style="western"><surname>Raghavan</surname><given-names>P</given-names> </name><name name-style="western"><surname>Schutz</surname><given-names>H</given-names> </name></person-group><article-title>Chapter 8. evaluation in information retrieval</article-title><source>Introduction to Information Retrieval</source><year>2008</year><access-date>2026-04-04</access-date><publisher-name>Cambridge University Press</publisher-name><fpage>139</fpage><lpage>161</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.cambridge.org/highereducation/books/introduction-to-information-retrieval/669D108D20F556C5C30957D63B5AB65C#overview">https://www.cambridge.org/highereducation/books/introduction-to-information-retrieval/669D108D20F556C5C30957D63B5AB65C#overview</ext-link></comment></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fawcett</surname><given-names>T</given-names> </name></person-group><article-title>An introduction to ROC analysis</article-title><source>Pattern Recognit Lett</source><year>2006</year><month>06</month><volume>27</volume><issue>8</issue><fpage>861</fpage><lpage>874</lpage><pub-id pub-id-type="doi">10.1016/j.patrec.2005.10.010</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Groenwold</surname><given-names>RHH</given-names> </name></person-group><article-title>Informative missingness in electronic health record systems: the curse of knowing</article-title><source>Diagn Progn Res</source><year>2020</year><volume>4</volume><issue>1</issue><fpage>8</fpage><pub-id pub-id-type="doi">10.1186/s41512-020-00077-0</pub-id><pub-id pub-id-type="medline">32699824</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sterne</surname><given-names>JAC</given-names> </name><name name-style="western"><surname>White</surname><given-names>IR</given-names> </name><name name-style="western"><surname>Carlin</surname><given-names>JB</given-names> </name><etal/></person-group><article-title>Multiple imputation for missing data in epidemiological and clinical research: potential and pitfalls</article-title><source>BMJ</source><year>2009</year><month>06</month><day>29</day><volume>338</volume><issue>jun29 1</issue><fpage>b2393</fpage><pub-id pub-id-type="doi">10.1136/bmj.b2393</pub-id><pub-id pub-id-type="medline">19564179</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Rochette</surname><given-names>T</given-names> </name></person-group><article-title>Ian Goodfellow-deep learning-2016</article-title><source>Tom Rochette</source><access-date>2025-09-04</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://blog.tomrochette.com/agi/books/ian-goodfellow-deep-learning#11-4-selecting-hyperparameters">https://blog.tomrochette.com/agi/books/ian-goodfellow-deep-learning#11-4-selecting-hyperparameters</ext-link></comment></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Lundberg</surname><given-names>S</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>SI</given-names> </name></person-group><article-title>A unified approach to interpreting model predictions</article-title><conf-name>Advances in neural information processing systems</conf-name><conf-date>Dec 4-9, 2017</conf-date><pub-id pub-id-type="doi">10.48550/arXiv.1705.07874</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mandrekar</surname><given-names>JN</given-names> </name></person-group><article-title>Receiver operating characteristic curve in diagnostic test assessment</article-title><source>J Thorac Oncol</source><year>2010</year><month>09</month><volume>5</volume><issue>9</issue><fpage>1315</fpage><lpage>1316</lpage><pub-id pub-id-type="doi">10.1097/JTO.0b013e3181ec173d</pub-id><pub-id pub-id-type="medline">20736804</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>&#x00C7;orbac&#x0131;o&#x011F;lu</surname><given-names>&#x015E;K</given-names> </name><name name-style="western"><surname>Aksel</surname><given-names>G</given-names> </name></person-group><article-title>Receiver operating characteristic curve analysis in diagnostic accuracy studies: a guide to interpreting the area under the curve value</article-title><source>Turk J Emerg Med</source><year>2023</year><volume>23</volume><issue>4</issue><fpage>195</fpage><lpage>198</lpage><pub-id pub-id-type="doi">10.4103/tjem.tjem_182_23</pub-id><pub-id pub-id-type="medline">38024184</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kanyongo</surname><given-names>W</given-names> </name><name name-style="western"><surname>Ezugwu</surname><given-names>AE</given-names> </name></person-group><article-title>Feature selection and importance of predictors of non-communicable diseases medication adherence from machine learning research perspectives</article-title><source>Inform Med Unlocked</source><year>2023</year><volume>38</volume><fpage>101232</fpage><pub-id pub-id-type="doi">10.1016/j.imu.2023.101232</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>XW</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>HB</given-names> </name><name name-style="western"><surname>Yuan</surname><given-names>R</given-names> </name><name name-style="western"><surname>Long</surname><given-names>EW</given-names> </name><name name-style="western"><surname>Tong</surname><given-names>RS</given-names> </name></person-group><article-title>Predictive models of medication non-adherence risks of patients with T2D based on multiple machine learning algorithms</article-title><source>BMJ Open Diabetes Res Care</source><year>2020</year><month>03</month><volume>8</volume><issue>1</issue><fpage>e001055</fpage><pub-id pub-id-type="doi">10.1136/bmjdrc-2019-001055</pub-id><pub-id pub-id-type="medline">32156739</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lo-Ciganic</surname><given-names>WH</given-names> </name><name name-style="western"><surname>Donohue</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Thorpe</surname><given-names>JM</given-names> </name><etal/></person-group><article-title>Using machine learning to examine medication adherence thresholds and risk of hospitalization</article-title><source>Med Care</source><year>2015</year><month>08</month><volume>53</volume><issue>8</issue><fpage>720</fpage><lpage>728</lpage><pub-id pub-id-type="doi">10.1097/MLR.0000000000000394</pub-id><pub-id pub-id-type="medline">26147866</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hofer</surname><given-names>IS</given-names> </name><name name-style="western"><surname>Burns</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kendale</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wanderer</surname><given-names>JP</given-names> </name></person-group><article-title>Realistically integrating machine learning into clinical practice: a road map of opportunities, challenges, and a potential future</article-title><source>Anesth Analg</source><year>2020</year><month>05</month><volume>130</volume><issue>5</issue><fpage>1115</fpage><lpage>1118</lpage><pub-id pub-id-type="doi">10.1213/ANE.0000000000004575</pub-id><pub-id pub-id-type="medline">32287118</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Scott</surname><given-names>IA</given-names> </name><name name-style="western"><surname>Cook</surname><given-names>D</given-names> </name><name name-style="western"><surname>Coiera</surname><given-names>EW</given-names> </name><name name-style="western"><surname>Richards</surname><given-names>B</given-names> </name></person-group><article-title>Machine learning in clinical practice: prospects and pitfalls</article-title><source>Med J Aust</source><year>2019</year><month>09</month><volume>211</volume><issue>5</issue><fpage>203</fpage><lpage>205.e1</lpage><pub-id pub-id-type="doi">10.5694/mja2.50294</pub-id><pub-id pub-id-type="medline">31389031</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhu</surname><given-names>T</given-names> </name><name name-style="western"><surname>Li</surname><given-names>K</given-names> </name><name name-style="western"><surname>Herrero</surname><given-names>P</given-names> </name><name name-style="western"><surname>Georgiou</surname><given-names>P</given-names> </name></person-group><article-title>Deep learning for diabetes: a systematic review</article-title><source>IEEE J Biomed Health Inform</source><year>2021</year><month>07</month><volume>25</volume><issue>7</issue><fpage>2744</fpage><lpage>2757</lpage><pub-id pub-id-type="doi">10.1109/JBHI.2020.3040225</pub-id><pub-id pub-id-type="medline">33232247</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Khalid</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>H</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>HS</given-names> </name></person-group><article-title>Recent trends in diabetes mellitus diagnosis: an in-depth review of artificial intelligence-based techniques</article-title><source>Diabetes Res Clin Pract</source><year>2025</year><month>06</month><volume>224</volume><fpage>112221</fpage><pub-id pub-id-type="doi">10.1016/j.diabres.2025.112221</pub-id><pub-id pub-id-type="medline">40328407</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kwak</surname><given-names>D</given-names> </name><name name-style="western"><surname>Liang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Shi</surname><given-names>X</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>X</given-names> </name></person-group><article-title>Comparing machine learning and advanced methods with traditional methods to generate weights in inverse probability of treatment weighting: the INFORM study</article-title><source>Pragmat Obs Res</source><year>2024</year><volume>15</volume><fpage>173</fpage><lpage>183</lpage><pub-id pub-id-type="doi">10.2147/POR.S466505</pub-id><pub-id pub-id-type="medline">39386162</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Supplemental material.</p><media xlink:href="diabetes_v11i1e85372_app1.docx" xlink:title="DOCX File, 314 KB"/></supplementary-material></app-group></back></article>