Chapter 2: R Code Sample dataset codebook: treat = Binary indicator of treatment versus control group x1-x5 = continuous confounders associated with Treat cont_out = Continuous outcome of interest bin_out = Binary outcome of interest MATCHING USING MATCHIT PACKAGE # Install MatchIt package prior to first use R> install.packages("MatchIt") # Call MatchIt library in every R session R> library(MatchIt) K:1 matching without replacement # 1:1 matching R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata) # 2:1 matching with caliper of 0.15 PS standard deviations R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method= “nearest”, ratio=2, caliper=0.15) # 2:1 matching, random sorting R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method= “nearest”, ratio=2, m.order=“random”) # 1:1 matching, Mahalanobis metric R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method="nearest", distance=mahalanobis) K:1 matching with replacement # 2:1 matching with replacement R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method= “nearest”, ratio=2, replace=T) # 3:1 matching with replacement, linear PS metric, caliper R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method= “nearest”, ratio=3, distance=“linear.logit”, caliper=0.20, replace=T) Combining nearest neighbor with Mahalanobis or exact matching # 1:1 Nearest neighbor matching, Mahalanobis matching on x1 and x2 within a caliper R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method="nearest", mahvars=c("x1","x2"), caliper=0.25) # Exact match on x1, x2; nearest neighbor matching on x3, x4, x5 R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method="nearest", exact=c("x1","x2")) Estimating treatment effect after matching # Obtain matched dataset from MatchIt output R> m.mydata <- match.data(m.out) # Matched without replacement: conduct t-test R> t.test(m.mydata$cont_out[m.mydata$treat==1], m.mydata$cont_out[m.mydata$treat==0]) # Alternatively, regression model R> lm(cont_out~treat, data=m.mydata) # Regression model including covariates R> glm(bin_out~treat+x1+x2+x3+x4+x5, data=m.mydata, family=binomial) # Matched with replacement, use frequency weights R> glm(bin_out~treat+x1+x2+x3+x4+x5, data=m.mydata, weights=weights, family=binomial) Coarsened exact matching using CEM package # Install CEM package R> install.packages('cem') R> library('cem') # CEM with automatic binning R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method="cem") # Obtain matched dataset from selected MatchIt output R> m.mydata <- match.data(m.out) # Estimate treatment effects; CEM weight is frequency weight R> lm(cont_out~treat+x1+x2+x3+x4+x5, data=m.mydata, weights=weights) Optimal matching using Optmatch package # Install Optmatch package R> install.packages('optmatch') R> library('optmatch') # Optimal matching with a 2:1 ratio R> m.out<-matchit(treat~x1+x2+x3+x4+x5,data=mydata,method="optimal", ratio=2) # Optimal matching with a variable ratio R> m.out<-matchit(treat~x1+x2+x3+x4+x5,data=mydata,method="optimal") # Obtain matched dataset from selected MatchIt output R> m.mydata <- match.data(m.out) # Use frequency weights when variable-ratio R> lm(cont_out~treat+x1+x2+x3+x4+x5, data=m.mydata, weights=weights) Full matching R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method="full") # Constrained full matching: 2-5 controls per treated individual R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method="full", min.controls=2, max.controls=5) # Obtain matched dataset from selected MatchIt output R> matched.mydata <- match.data(m.out) # Outcome regression, use frequency weights R> lm(cont_out~treat+x1+x2+x3+x4+x5, data=matched.mydata, weights=weights) Balance diagnostics in MatchIt # 1:1 matching R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata) # Print balance table R> summary(m.out, standardize=T) # Plot of absolute SMD for all covariates, before and after matching R> plot(summary(m.out, standardize=T), interactive=F) # Jitter plot of propensity scores R> plot(m.out, type = "jitter", interactive = F) # Propensity score histogram plot R> plot(m.out, type = "hist") PROPENSITY SCORE WEIGHTING, PARAMETRIC PS ESTIMATION # Install survey package prior to first use R> install.packages("survey") # Call survey library in every R session R> library(survey) # Estimate the propensity score with logistic regression R> ps.logit <- glm(treat~x1+x2+x3+x4+x5, data=mydata, family=binomial) R> mydata$pscore <- predict(ps.logit, type="response") # Calculate ATE propensity score weights (IPTW) R> mydata$w.ate<-ifelse(mydata$treat==1,1/mydata$pscore,1/(1-mydata$pscore)) # Use ATE weights as probability weights in final analysis R> design.ate <- svydesign(ids=~1,weights=~w.ate,data=mydata) R> svyglm(cont_out~treat+x1+x2+x3+x4+x5, design=design.ate) # Calculate ATT propensity score weights R> mydata$w.att<-ifelse(mydata$treat==1,1,mydata$pscore/(1-mydata$pscore)) # Use ATT weights as probability weights in final analysis R> design.att <- svydesign(ids=~1,weights=~w.att,data=mydata) R> svyglm(cont_out~treat+x1+x2+x3+x4+x5, design=design.att) PROPENSITY SCORE WEIGHTING, GBM PS ESTIMATION USING TWANG PACKAGE # Install twang package prior to first use R> install.packages("twang") # Call twang library in every R session R> library(twang) # Install survey package prior to first use R> install.packages("survey") # Call survey library in every R session R> library(survey) # Set random seed in order to ensure duplication of results R> set.seed(1234) # Run propensity score model, calculating ATT weights R> mydata_ps1 <- ps(treat~x1+x2+x3+x4+x5, data=mydata, estimand= “ATT”, verbose=FALSE) # Save ATT weight as variable to original dataset R> mydata$w.att <- get.weights(mydata_ps1,stop.method="ks.mean") # Use ATT weights as probability weights in final analysis R> design.att <- svydesign(ids=~1,weights=~w.att,data=mydata) R> svyglm(cont_out~treat+x1+x2+x3+x4+x5, design=design.att) # Run propensity score model, calculating ATE weights R> mydata_ps2 <- ps(treat~x1+x2+x3+x4+x5, data=mydata, estimand= “ATE”, verbose=FALSE) # Save ATE weight as variable to original dataset R> mydata$w.ate <- get.weights(mydata_ps2,stop.method="ks.mean") # Use ATE weights as probability weights in final analysis R> design.ate <- svydesign(ids=~1,weights=~w.ate,data=mydata) R> svyglm(cont_out~treat+x1+x2+x3+x4+x5, design=design.ate) Balance diagnostics in Twang # Run propensity score model, calculating ATT weights R> mydata_ps <- ps(treat~x1+x2+x3+x4+x5, data=mydata, estimand= “ATT”, verbose=FALSE) # Print balance table R> bal.table(mydata_ps) # Provides quick summary of balance, effective sample size R> summary(mydata_ps) # Boxplot of Propensity Scores R> plot(t mydata_ps, plots=“boxplot”) # Effect Size plots R> plot(mydata_ps, plots=“es”) # Check distribution of weights within each treatment group R> summary(mydata$w.att[mydata$treat==1]) R> summary(mydata$w.att[mydata$treat==0]) SUBCLASSIFICATION USING MATCHIT PACKAGE # Install MatchIt package prior to first use R> install.packages("MatchIt") # Call MatchIt library in every R session R> library(MatchIt) Generating subclasses based on propensity score # Subclassification with 5 subclasses R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method="subclass", subclass=5) Calculating subclass-specific effects # Get matched data, subclass indicator variable is “subclass” R> data.sub <- match.data(m.out) # Subclass-specific t-test for given subclass R> t.test(cont_out~treat, data=data.sub, subset=subclass==1) # Equivalent to subclass-specific t-test for all subclasses R> lm(cont_out~as.factor(I(subclass)) + as.factor(I(subclass*treat)) -1, data=data.sub) ATE estimate (combining subclass-specific estimate) # define N = total number of people R> N <- dim(data.sub)[1] # Initialize vectors for subclass-specific effects (“sub.effect”), variances(“sub.var”), and sample size(“sub.N”) R> sub.effect <- rep(NA, max(data.sub$subclass)) R> sub.var <- rep(NA, max(data.sub$subclass)) R> sub.N <- rep(NA, max(data.sub$subclass)) # Run linear regression model within each subclass R> for(s in 1:max(data.sub$subclass)){ tmp <- lm(cont_out ~ treat, data=data.sub, subset=subclass==s) sub.effect[s] <- tmp$coef[2] sub.var[s] <- summary(tmp)$coef[2,2]^2 sub.N[s] <- sum(data.sub$subclass==s) } # Calculate overall ATE effect R> ATE.effect <- sum((sub.N/N)*sub.effect) R> ATE.stderror <- sqrt(sum((sub.N/N)^2*sub.var))