@article{PlonerHessGrumetal.2020, author = {Ploner, Tina and Hess, Steffen and Grum, Marcus and Drewe-Boss, Philipp and Walker, Jochen}, title = {Using gradient boosting with stability selection on health insurance claims data to identify disease trajectories in chronic obstructive pulmonary disease}, series = {Statistical methods in medical research}, volume = {29}, journal = {Statistical methods in medical research}, number = {12}, publisher = {Sage Publ.}, address = {London [u.a.]}, issn = {0962-2802}, doi = {10.1177/0962280220938088}, pages = {3684 -- 3694}, year = {2020}, abstract = {Objective We propose a data-driven method to detect temporal patterns of disease progression in high-dimensional claims data based on gradient boosting with stability selection. Materials and methods We identified patients with chronic obstructive pulmonary disease in a German health insurance claims database with 6.5 million individuals and divided them into a group of patients with the highest disease severity and a group of control patients with lower severity. We then used gradient boosting with stability selection to determine variables correlating with a chronic obstructive pulmonary disease diagnosis of highest severity and subsequently model the temporal progression of the disease using the selected variables. Results We identified a network of 20 diagnoses (e.g. respiratory failure), medications (e.g. anticholinergic drugs) and procedures associated with a subsequent chronic obstructive pulmonary disease diagnosis of highest severity. Furthermore, the network successfully captured temporal patterns, such as disease progressions from lower to higher severity grades. Discussion The temporal trajectories identified by our data-driven approach are compatible with existing knowledge about chronic obstructive pulmonary disease showing that the method can reliably select relevant variables in a high-dimensional context. Conclusion We provide a generalizable approach for the automatic detection of disease trajectories in claims data. This could help to diagnose diseases early, identify unknown risk factors and optimize treatment plans.}, language = {en} }