R - Duke

advertisement
Chapter 2: R Code
Sample dataset codebook:
treat = Binary indicator of treatment versus control group
x1-x5 = continuous confounders associated with Treat
cont_out = Continuous outcome of interest
bin_out = Binary outcome of interest
MATCHING USING MATCHIT PACKAGE
# Install MatchIt package prior to first use
R> install.packages("MatchIt")
# Call MatchIt library in every R session
R> library(MatchIt)
K:1 matching without replacement
# 1:1 matching
R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata)
# 2:1 matching with caliper of 0.15 PS standard deviations
R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method=
“nearest”, ratio=2, caliper=0.15)
# 2:1 matching, random sorting
R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method= “nearest”,
ratio=2, m.order=“random”)
# 1:1 matching, Mahalanobis metric
R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method="nearest",
distance=mahalanobis)
K:1 matching with replacement
# 2:1 matching with replacement
R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method= “nearest”,
ratio=2, replace=T)
# 3:1 matching with replacement, linear PS metric, caliper
R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method= “nearest”,
ratio=3, distance=“linear.logit”, caliper=0.20, replace=T)
Combining nearest neighbor with Mahalanobis or exact matching
# 1:1 Nearest neighbor matching, Mahalanobis matching on x1 and x2 within a
caliper
R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method="nearest",
mahvars=c("x1","x2"), caliper=0.25)
# Exact match on x1, x2; nearest neighbor matching on x3, x4, x5
R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method="nearest",
exact=c("x1","x2"))
Estimating treatment effect after matching
# Obtain matched dataset from MatchIt output
R> m.mydata <- match.data(m.out)
# Matched without replacement: conduct t-test
R> t.test(m.mydata$cont_out[m.mydata$treat==1],
m.mydata$cont_out[m.mydata$treat==0])
# Alternatively, regression model
R> lm(cont_out~treat, data=m.mydata)
# Regression model including covariates
R> glm(bin_out~treat+x1+x2+x3+x4+x5, data=m.mydata, family=binomial)
# Matched with replacement, use frequency weights
R> glm(bin_out~treat+x1+x2+x3+x4+x5, data=m.mydata, weights=weights,
family=binomial)
Coarsened exact matching using CEM package
# Install CEM package
R> install.packages('cem')
R> library('cem')
# CEM with automatic binning
R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method="cem")
# Obtain matched dataset from selected MatchIt output
R> m.mydata <- match.data(m.out)
# Estimate treatment effects; CEM weight is frequency weight
R> lm(cont_out~treat+x1+x2+x3+x4+x5, data=m.mydata, weights=weights)
Optimal matching using Optmatch package
# Install Optmatch package
R> install.packages('optmatch')
R> library('optmatch')
# Optimal matching with a 2:1 ratio
R> m.out<-matchit(treat~x1+x2+x3+x4+x5,data=mydata,method="optimal",
ratio=2)
# Optimal matching with a variable ratio
R> m.out<-matchit(treat~x1+x2+x3+x4+x5,data=mydata,method="optimal")
# Obtain matched dataset from selected MatchIt output
R> m.mydata <- match.data(m.out)
# Use frequency weights when variable-ratio
R> lm(cont_out~treat+x1+x2+x3+x4+x5, data=m.mydata, weights=weights)
Full matching
R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method="full")
# Constrained full matching: 2-5 controls per treated individual
R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method="full",
min.controls=2, max.controls=5)
# Obtain matched dataset from selected MatchIt output
R> matched.mydata <- match.data(m.out)
# Outcome regression, use frequency weights
R> lm(cont_out~treat+x1+x2+x3+x4+x5, data=matched.mydata, weights=weights)
Balance diagnostics in MatchIt
# 1:1 matching
R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata)
# Print balance table
R> summary(m.out, standardize=T)
# Plot of absolute SMD for all covariates, before and after matching
R> plot(summary(m.out, standardize=T), interactive=F)
# Jitter plot of propensity scores
R> plot(m.out, type = "jitter", interactive = F)
# Propensity score histogram plot
R> plot(m.out, type = "hist")
PROPENSITY SCORE WEIGHTING, PARAMETRIC PS ESTIMATION
# Install survey package prior to first use
R> install.packages("survey")
# Call survey library in every R session
R> library(survey)
# Estimate the propensity score with logistic regression
R> ps.logit <- glm(treat~x1+x2+x3+x4+x5, data=mydata, family=binomial)
R> mydata$pscore <- predict(ps.logit, type="response")
# Calculate ATE propensity score weights (IPTW)
R> mydata$w.ate<-ifelse(mydata$treat==1,1/mydata$pscore,1/(1-mydata$pscore))
# Use ATE weights as probability weights in final analysis
R> design.ate <- svydesign(ids=~1,weights=~w.ate,data=mydata)
R> svyglm(cont_out~treat+x1+x2+x3+x4+x5, design=design.ate)
# Calculate ATT propensity score weights
R> mydata$w.att<-ifelse(mydata$treat==1,1,mydata$pscore/(1-mydata$pscore))
# Use ATT weights as probability weights in final analysis
R> design.att <- svydesign(ids=~1,weights=~w.att,data=mydata)
R> svyglm(cont_out~treat+x1+x2+x3+x4+x5, design=design.att)
PROPENSITY SCORE WEIGHTING, GBM PS ESTIMATION USING TWANG PACKAGE
# Install twang package prior to first use
R> install.packages("twang")
# Call twang library in every R session
R> library(twang)
# Install survey package prior to first use
R> install.packages("survey")
# Call survey library in every R session
R> library(survey)
# Set random seed in order to ensure duplication of results
R> set.seed(1234)
# Run propensity score model, calculating ATT weights
R> mydata_ps1 <- ps(treat~x1+x2+x3+x4+x5, data=mydata, estimand= “ATT”,
verbose=FALSE)
# Save ATT weight as variable to original dataset
R> mydata$w.att <- get.weights(mydata_ps1,stop.method="ks.mean")
# Use ATT weights as probability weights in final analysis
R> design.att <- svydesign(ids=~1,weights=~w.att,data=mydata)
R> svyglm(cont_out~treat+x1+x2+x3+x4+x5, design=design.att)
# Run propensity score model, calculating ATE weights
R> mydata_ps2 <- ps(treat~x1+x2+x3+x4+x5, data=mydata, estimand= “ATE”,
verbose=FALSE)
# Save ATE weight as variable to original dataset
R> mydata$w.ate <- get.weights(mydata_ps2,stop.method="ks.mean")
# Use ATE weights as probability weights in final analysis
R> design.ate <- svydesign(ids=~1,weights=~w.ate,data=mydata)
R> svyglm(cont_out~treat+x1+x2+x3+x4+x5, design=design.ate)
Balance diagnostics in Twang
# Run propensity score model, calculating ATT weights
R> mydata_ps <- ps(treat~x1+x2+x3+x4+x5, data=mydata, estimand= “ATT”,
verbose=FALSE)
# Print balance table
R> bal.table(mydata_ps)
# Provides quick summary of balance, effective sample size
R> summary(mydata_ps)
# Boxplot of Propensity Scores
R> plot(t mydata_ps, plots=“boxplot”)
# Effect Size plots
R> plot(mydata_ps, plots=“es”)
# Check distribution of weights within each treatment group
R> summary(mydata$w.att[mydata$treat==1])
R> summary(mydata$w.att[mydata$treat==0])
SUBCLASSIFICATION USING MATCHIT PACKAGE
# Install MatchIt package prior to first use
R> install.packages("MatchIt")
# Call MatchIt library in every R session
R> library(MatchIt)
Generating subclasses based on propensity score
# Subclassification with 5 subclasses
R> m.out <- matchit(treat~x1+x2+x3+x4+x5, data=mydata, method="subclass",
subclass=5)
Calculating subclass-specific effects
# Get matched data, subclass indicator variable is “subclass”
R> data.sub <- match.data(m.out)
# Subclass-specific t-test for given subclass
R> t.test(cont_out~treat, data=data.sub, subset=subclass==1)
# Equivalent to subclass-specific t-test for all subclasses
R> lm(cont_out~as.factor(I(subclass)) + as.factor(I(subclass*treat)) -1,
data=data.sub)
ATE estimate (combining subclass-specific estimate)
# define N = total number of people
R> N <- dim(data.sub)[1]
# Initialize vectors for subclass-specific effects
(“sub.effect”), variances(“sub.var”), and sample size(“sub.N”)
R> sub.effect <- rep(NA, max(data.sub$subclass))
R> sub.var <- rep(NA, max(data.sub$subclass))
R> sub.N <- rep(NA, max(data.sub$subclass))
# Run linear regression model within each subclass
R> for(s in 1:max(data.sub$subclass)){
tmp <- lm(cont_out ~ treat, data=data.sub, subset=subclass==s)
sub.effect[s] <- tmp$coef[2]
sub.var[s] <- summary(tmp)$coef[2,2]^2
sub.N[s] <- sum(data.sub$subclass==s) }
# Calculate overall ATE effect
R> ATE.effect <- sum((sub.N/N)*sub.effect)
R> ATE.stderror <- sqrt(sum((sub.N/N)^2*sub.var))
Download