##------------------------------------------------------------------------## ## Script for Soc 761: Survival Analysis ## ## John Fox ## ## Fall 2014 ## ##------------------------------------------------------------------------## # Life tables source("lifeTable.R") # for file lifeTable.R in the current directory Mortality <- read.table(file.choose(), header=TRUE) head(Mortality) (LifeFemale <- lifeTable(Mortality$q.female)) # Plotting q[x] and l[x] options(scipen=4) # suppress scientific notation plot(0:109, LifeFemale[,4], xlab="Age in years, x", ylab="Age-specific mortality rate, q[x]", type="l", log="y") # log vertical axis plot(0:109, LifeFemale[,1], xlab="Age in years, x", ylab="Number surviving, l[x]", type="l", log="y") # the Kaplan-Meier estimator Rossi <- read.table(file.choose(), header=TRUE) Rossi[1:10,1:12] # first 10 observations library(survival) S.Rossi <- survfit(Surv(week, arrest) ~ 1, data=Rossi) summary(S.Rossi) plot(S.Rossi) plot(S.Rossi, ylim=c(.7, 1.0), xlab="t", ylab="estimated S(t)") # comparing two survival curves S.Rossi.fin <- survfit(Surv(week, arrest) ~ fin, data=Rossi) summary(S.Rossi.fin) plot(S.Rossi.fin, col=c("red", "blue"), lty=1:2, ylim=c(.65, 1.0), xlab="t", ylab="estimated S(t)") legend("bottomleft", legend=c("no aid", "aid"), lty=1:2, col=c("red", "blue"), inset=0.02) # testing the difference between two survival curves survdiff(Surv(week, arrest) ~ fin, data=Rossi) # log-rank test # Cox proportional-hazards regression args(coxph) mod.allison <- coxph(Surv(week, arrest) ~ fin + age + race + wexp + mar + paro + prio, data=Rossi) mod.allison summary(mod.allison) # time-dependent covariate Rossi[1, ] source("unfold.R") # for file unfold.R in the current directory system.time(Rossi.2 <- unfold(Rossi, time='week', # create "long form" of data event='arrest', cov=11:62, cov.names='emp')) head(Rossi.2, 50) # check equivalence of (time, censor) short and # (start, stop, event) long data formats coxph(Surv(week, arrest) ~ fin + age + race + wexp + mar + paro + prio, data=Rossi) coxph(Surv(start, stop, arrest.time) ~ fin + age + race + wexp + mar + paro + prio, data=Rossi.2) # Note: can also use reshape(), but it's a bit less convenient mod.allison.2 <- coxph(Surv(start, stop, arrest.time) ~ fin + age + race + wexp + mar + paro + prio + emp, data=Rossi.2) summary(mod.allison.2) # lagged covariates Rossi.3 <- unfold(Rossi, 'week', 'arrest', 11:62, 'emp', lag=1) head(Rossi.3, 50) mod.allison.3 <- coxph(Surv(start, stop, arrest.time) ~ fin + age + race + wexp + mar + paro + prio + emp, data=Rossi.3) summary(mod.allison.3) # diagnostics # non-proportional hazards mod.allison <- coxph(Surv(week, arrest) ~ fin + age + race + wexp + mar + paro + prio, data=Rossi) mod.allison (allison.zph <- cox.zph(mod.allison)) windows() # for demonstration par(mfrow=c(3, 3)) plot(allison.zph) # specifying interactions with time mod.allison.4 <- coxph(Surv(start, stop, arrest.time) ~ fin + age + age:stop + wexp + wexp:stop + prio, data=Rossi.2) # note: no "main effect" of stop mod.allison.4 cox.zph(mod.allison.4) # fitting by strata hist(Rossi$age) Rossi$age.cat <- cut(Rossi$age, c(0, 20, 25, Inf)) addmargins(xtabs(~ age.cat + wexp, data=Rossi)) mod.allison.5 <- coxph(Surv(week, arrest) ~ fin + prio + strata(age.cat, wexp), data=Rossi) mod.allison.5 cox.zph(mod.allison.5) # detecting influential observations par(mfrow=c(2, 2)) dfbeta <- residuals(mod.allison.5, type="dfbeta") for (j in 1:2){ plot(dfbeta[,j], ylab=paste("dfbeta(",names(coef(mod.allison.5))[j], ")", sep="")) abline(h=0, lty=2) } dfbetas <- residuals(mod.allison.5, type="dfbetas") for (j in 1:2){ plot(dfbetas[,j], ylab=paste("dfbetas(",names(coef(mod.allison.5))[j], ")", sep="")) abline(h=0, lty=2) } # detecting nonlinearity par(mfrow=c(1, 2)) res <- residuals(mod.allison.5, type='martingale') plot(Rossi$prio, res, xlab="prio", ylab="Martingale residuals") abline(h=0, lty=2) lines(lowess(Rossi$prio, res, iter=0)) pres <- res + coef(mod.allison.5)[2]*Rossi$prio plot(Rossi$prio, pres, xlab="prio", ylab="Partial residuals") abline(lm(pres ~ Rossi$prio), lty=2) lines(lowess(Rossi$prio, pres, iter=0)) # estimating survival mod.allison.6 <- coxph(Surv(week, arrest) ~ fin + age + prio, data=Rossi) mod.allison.6 mean.age <- mean(Rossi$age) mean.age mean.prio <- mean(Rossi$prio) mean.prio pred <- data.frame(fin=c(0,1), age=rep(mean.age, 2), prio=rep(mean.prio, 2)) pred S <- survfit(mod.allison.6, newdata=pred) summary(S) par(mfrow=c(1, 1)) plot(S, lty=1:2, ylim=c(.7,1), ylab="Estimated S(t) at average age and prior arrests", xlab="Time t (weeks)", col=c("red", "blue")) legend("bottomleft", inset=0.02, legend=c("no aid", "aid"), lty=1:2, col=c("red", "blue"))