### R code from vignette source 'RCAL-ATE-vig.Rnw' ################################################### ### code chunk number 1: read.data ################################################### library(RCAL) data(simu.data) ################################################### ### code chunk number 2: RCAL-ATE-vig.Rnw:90-91 ################################################### simu.data[1:10, 1:6] ################################################### ### code chunk number 3: RCAL-ATE-vig.Rnw:97-104 ################################################### n <- dim(simu.data)[1] p <- 100 # include the first 100 covariates due to CRAN time constraint y <- simu.data[,1] tr <- simu.data[,2] x <- simu.data[,2+1:p] x <- scale(x) ################################################### ### code chunk number 4: RCAL-ATE-vig.Rnw:114-119 ################################################### par(mfrow=c(3,2)) par(mar=c(4,4,2,2)) for (j in 1:6) { boxplot(x[,j] ~ tr, ylab=paste("covariate x", j, sep=""), xlab="treatment") } ################################################### ### code chunk number 5: RCAL-ATE-vig.Rnw:130-135 ################################################### par(mfrow=c(3,2)) par(mar=c(4,4,2,2)) for (j in 1:6) { plot(x[tr==1,j], y[tr==1], ylab="y", xlab=paste("covariate x", j, sep="")) } ################################################### ### code chunk number 6: RCAL-ATE-vig.Rnw:156-158 ################################################### mean(y[tr==1]) # point estimate sqrt(var(y[tr==1]) / sum(tr) ) # standard error ################################################### ### code chunk number 7: RCAL-ATE-vig.Rnw:175-195 ################################################### ## regularized calibrated estimation RNGversion('3.5.0') set.seed(0) #this affects random split of data in cross validation mn.cv.rcal <- mn.regu.cv(fold=5*c(1,1), nrho=(1+10)*c(1,1), rho.seq=NULL, y, tr, x, ploss="cal", yloss="gaus") unlist(mn.cv.rcal$est) sqrt(mn.cv.rcal$est $var) mn.cv.rcal$ps$sel.nz[1] fp.cv.rcal <- mn.cv.rcal$ps$sel.fit[,1] ## regularized maximum likelihood estimation set.seed(0) #this affects random split of data in cross validation mn.cv.rml <- mn.regu.cv(fold=5*c(1,1), nrho=(1+10)*c(1,1), rho.seq=NULL, y, tr, x, ploss="ml", yloss="gaus") unlist(mn.cv.rml$est) sqrt(mn.cv.rml$est $var) mn.cv.rml$ps$sel.nz[1] fp.cv.rml <- mn.cv.rml$ps$sel.fit[,1] ################################################### ### code chunk number 8: RCAL-ATE-vig.Rnw:202-232 (eval = FALSE) ################################################### ## ## regularized calibrated estimation ## set.seed(0) ## ps.cv.rcal <- ## glm.regu.cv(fold=5, nrho=1+10, y=tr, x=x, loss="cal") ## ps.cv.rcal$sel.nz[1] ## fp.cv.rcal <- ps.cv.rcal $sel.fit[,1] ## ## or.cv.rcal <- ## glm.regu.cv(fold=5, nrho=1+10, y=y[tr==1], x=x[tr==1,], ## iw=1/fp.cv.rcal[tr==1]-1, loss="gaus") ## fo.cv.rcal <- c( cbind(1,x)%*%or.cv.rcal$sel.bet[,1] ) ## ## mn.cv.rcal2 <- unlist(mn.aipw(y, tr, fp=fp.cv.rcal, fo=fo.cv.rcal)) ## mn.cv.rcal2 ## ## ## regularized maximum likelihood estimation ## set.seed(0) ## ## ps.cv.rml <- ## glm.regu.cv(fold=5, nrho=1+10, y=tr, x=x, loss="ml") ## ps.cv.rml$sel.nz[1] ## fp.cv.rml <- ps.cv.rml $sel.fit[,1] ## ## or.cv.rml <- ## glm.regu.cv(fold=5, nrho=1+10, y=y[tr==1], x=x[tr==1,], ## iw=NULL, loss="gaus") ## fo.cv.rml <- c( cbind(1,x)%*%or.cv.rml$sel.bet[,1] ) ## ## mn.cv.rml2 <- unlist(mn.aipw(y, tr, fp=fp.cv.rml, fo=fo.cv.rml)) ## mn.cv.rml2 ################################################### ### code chunk number 9: RCAL-ATE-vig.Rnw:253-279 ################################################### fp.raw <- rep(mean(tr), n) #constant propensity scores check.raw <- mn.ipw(x, tr, fp.raw) check.cv.rcal <- mn.ipw(x, tr, fp.cv.rcal) check.cv.rml <- mn.ipw(x, tr, fp.cv.rml) par(mfrow=c(2,2)) par(mar=c(4,4,2,2)) plot(check.raw$est, xlim=c(0,p), ylim=c(-.3,.3), xlab="", ylab="std diff", main="Raw") abline(h=0) plot(check.cv.rml$est, xlim=c(0,p), ylim=c(-.3,.3), xlab="", ylab="std diff", main="RML") abline(h=0) abline(h=max(abs(check.cv.rml$est)) *c(-1,1), lty=2) plot(check.cv.rcal$est, xlim=c(0,p), ylim=c(-.3,.3), xlab="", ylab="std diff", main="RCAL") abline(h=0) abline(h=max(abs(check.cv.rcal$est)) *c(-1,1), lty=2) plot(fp.cv.rml[tr==1], fp.cv.rcal[tr==1], xlim=c(0,1), ylim=c(0,1), xlab="RML", ylab="RCAL", main="fitted probabilities") abline(c(0,1)) ################################################### ### code chunk number 10: RCAL-ATE-vig.Rnw:286-305 ################################################### par(mfrow=c(2,2)) par(mar=c(4,4,2,2)) plot(check.raw$est, xlim=c(0,p), ylim=c(-.3,.3), xlab="", ylab="std diff", main="Raw") abline(h=0) plot(check.cv.rml$est, xlim=c(0,p), ylim=c(-.3,.3), xlab="", ylab="std diff", main="RML") abline(h=0) abline(h=max(abs(check.cv.rml$est)) *c(-1,1), lty=2) plot(check.cv.rcal$est, xlim=c(0,p), ylim=c(-.3,.3), xlab="", ylab="std diff", main="RCAL") abline(h=0) abline(h=max(abs(check.cv.rcal$est)) *c(-1,1), lty=2) plot(fp.cv.rml[tr==1], fp.cv.rcal[tr==1], xlim=c(0,1), ylim=c(0,1), xlab="RML", ylab="RCAL", main="fitted probabilities") abline(c(0,1)) ################################################### ### code chunk number 11: RCAL-ATE-vig.Rnw:320-347 ################################################### set.seed(0) ps.path.rcal <- glm.regu.path(nrho=1+10, rho.seq=NULL, y=tr, x=x, loss="cal") fp.path.rcal <- ps.path.rcal $fit.all[, !ps.path.rcal$non.conv] mdiff.path.rcal <- rep(NA, dim(fp.path.rcal)[2]) rvar.path.rcal <- rep(NA, dim(fp.path.rcal)[2]) for (j in 1:dim(fp.path.rcal)[2]) { check.path.rcal <- mn.ipw(x, tr, fp.path.rcal[,j]) mdiff.path.rcal[j] <- max(abs(check.path.rcal$est)) rvar.path.rcal[j] <- var(1/fp.path.rcal[tr==1,j])/mean(1/fp.path.rcal[tr==1,j])^2 } set.seed(0) ps.path.rml <- glm.regu.path(nrho=1+10, rho.seq=NULL, y=tr, x=x, loss="ml") fp.path.rml <- ps.path.rml $fit.all[, !ps.path.rml$non.conv] mdiff.path.rml <- rep(NA, dim(fp.path.rml)[2]) rvar.path.rml <- rep(NA, dim(fp.path.rml)[2]) for (j in 1:dim(fp.path.rml)[2]) { check.path.rml <- mn.ipw(x, tr, fp.path.rml[,j]) mdiff.path.rml[j] <- max(abs(check.path.rml$est)) rvar.path.rml[j] <- var(1/fp.path.rml[tr==1,j])/mean(1/fp.path.rml[tr==1,j])^2 } ################################################### ### code chunk number 12: RCAL-ATE-vig.Rnw:354-374 ################################################### par(mfrow=c(1,2)) par(mar=c(4,4,2,2)) plot(ps.path.rml$nz.all[!ps.path.rml$non.conv], mdiff.path.rml, xlim=c(0,p), ylim=c(0,.3), xlab="# nonzero", ylab="std diff") lines(ps.path.rml$nz.all[!ps.path.rml$non.conv], mdiff.path.rml, lty=3) points(ps.path.rcal$nz.all[!ps.path.rcal$non.conv], mdiff.path.rcal, pch=4) lines(ps.path.rcal$nz.all[!ps.path.rcal$non.conv], mdiff.path.rcal, lty=3) legend(120,.3, c("RML","RCAL"), pch=c(1,4), cex=.6) # plot(rvar.path.rml, mdiff.path.rml, xlim=c(0,1.5), ylim=c(0,.3), xlab="rel var", ylab="std diff") lines(rvar.path.rml, mdiff.path.rml, lty=3) points(rvar.path.rcal, mdiff.path.rcal, pch=4) lines(rvar.path.rcal, mdiff.path.rcal, lty=3) legend(1.0,.3, c("RML","RCAL"), pch=c(1,4), cex=.6) ################################################### ### code chunk number 13: RCAL-ATE-vig.Rnw:384-401 ################################################### ## regularized calibrated estimation set.seed(0) ate.cv.rcal <- ate.regu.cv(fold=5*c(1,1), nrho=(1+10)*c(1,1), rho.seq=NULL, y, tr, x, ploss="cal", yloss="gaus") matrix(unlist(ate.cv.rcal$est), ncol=2, byrow=T, dimnames=list(c("one", "ipw", "or", "est", "var", "ze", "diff.est", "diff.var", "diff.ze"), c("untreated", "treated"))) ## regularized maximum likelihood estimation set.seed(0) ate.cv.rml <- ate.regu.cv(fold=5*c(1,1), nrho=(1+10)*c(1,1), rho.seq=NULL, y, tr, x, ploss="ml", yloss="gaus") matrix(unlist(ate.cv.rml$est), ncol=2, byrow=T, dimnames=list(c("one", "ipw", "or", "est", "var", "ze", "diff.est", "diff.var", "diff.ze"), c("untreated", "treated")))