R Code for CovidWatch Analysis


#### Propensity score model ####
 
# Generate analytical dataset 
COHORT_Outpatient <- raw_data %>% filter(COHORT == "outpatient") %>% 
  select("MRN","ENC_ID","WATCH","N_OUT_OF_HOSP_DAYS","Result_collect_day","Enroll_Collect_day" , "Enroll_Result_day" ,
         "Month","AGE", "Age_Grp", "SEX", "Race_Grp", "Place_of_residence", "FINANCIAL_CLASS_Grp","PCP_internal_external_2", 
          # Comorbidities
         "HYPERTENSION_DX_YN", "DIABETES_DX_YN", "HYPERLIPIDEMIA_DX_YN", "CHF_DX_YN",  "AFIB_DX_YN",
         "CANCER_DX_YN",  'COPD_DX_YN', "ASTHMA_DX_YN",   "VTE_DX_YN",  "DEMENTIA_DX_YN",
         "CHRONIC_KIDNEY_DISEASE_DX_YN", "TRANSPLANT_DX_YN", "SUBSTANCE_ABUSE_DX_YN",
         "Encounter_type", "Place_of_residence","median_household_income",
        # Past utilization
        "PREVIOUS_ED_VISITS_PAST_YEAR", "PREVIOUS_OBSERVATION_VISITS_PAST_YEAR",
        "PREVIOUS_INPATIENT_ADMISSIONS_PAST_YEAR",  "PREVIOUS_OFFICE_VISITS_PAST_YEAR", 
        "DISCHARGE_TO_HOME_HEALTH_PAST_YEAR_YN"  )

# Specify the outcome variable 
OP_outcome <- COHORT_Outpatient$WATCH

# Create a dataframe to contain all explanatory variables 
OP_var <- subset(COHORT_Outpatient, select = c(-WATCH, -MRN))

# Convert categorical variables to dummy variables 
OP_var <- dummy_cols(OP_var,
                     remove_first_dummy = T,
                     remove_selected_columns =T)
OP_var <- as.data.frame(OP_var)


# Set a seed for reproducibility  

set.seed(2020)

 
# SuperLearner model fitting
SL_OP <- SuperLearner(Y = OP_outcome, X = OP_var, family = binomial(),
                      SL.library = c( "SL.glm"))
 

# Other wrappers tested  

# SL_OP <- SuperLearner(Y = OP_outcome, X = OP_var, family = binomial("logit"),
#                           SL.library = c( "SL.ranger","SL.glm","SL.randomForest" ,"SL.svm", "SL.caret","SL.bartMachine"))
# SL_OP <- SuperLearner(Y = OP_outcome, X = OP_var, family = binomial("logit"),
#                           SL.library = c( "SL.glm","SL.randomForest" ))
 

# Predicted probability
pred_OP <- predict(SL_OP, OP_var,type = "response", onlySL = TRUE)


# Add predicted probabilities back to the analytical dataset
COHORT_Outpatient$Predicted <- pred_OP$pred[, 1]

# Inverse propensity score weightng 
COHORT_Outpatient$PS_weight <- ifelse(COHORT_Outpatient$WATCH == 1, 1/COHORT_Outpatient$Predicted, 1/(1-COHORT_Outpatient$Predicted))