##------------------------------------------------------------------------## ## Script for Logit and Probit Models ## ## John Fox ## ## York SPIDA 2010 ## ##------------------------------------------------------------------------## # Chilean plebiscite data library(car) ?Chile Chile$yes <- with(Chile, ifelse(vote == "Y", 1, ifelse(vote=="N", 0, NA))) table(Chile$vote) table(Chile$yes) Chile <- na.omit(Chile[,c("yes", "statusquo")]) with(Chile, plot(statusquo, jitter(yes, factor=1/4), xlab="Support for the Status Quo", ylab="Voting Intention")) abline(h=c(0,1), lty=2) mod.lin <- lm(yes ~ statusquo, data=Chile) summary(mod.lin) abline(mod.lin, lwd=2) # linear probability model by OLS mod <- glm(yes ~ statusquo, family=binomial, data=Chile) # logit model by ML summary(mod) sq <- with(Chile, seq(min(statusquo), max(statusquo), length=1000)) fit <- predict(mod, newdata=data.frame(statusquo=sq), type="response") lines(sq, fit, lwd=2) mod.lo <- loess(yes~statusquo, span=0.4, degree=1, data=Chile) fit.lo <- predict(mod.lo, newdata=data.frame(statusquo=sq)) lines(sq, fit.lo, lwd=2, lty=2) # logistic regression for SLID data SLID <- read.table( "http://socserv.socsci.mcmaster.ca/jfox/Books/Applied-Regression-2E/datasets/SLID-women.txt", header=TRUE) some(SLID) summary(SLID) SLID$region <- factor(SLID$region, # re-order levels levels=c("Atlantic", "Quebec", "Ontario", "Prairies", "BC")) mod.slid.1 <- glm(working ~ region + kids0004 + kids0509 + kids1014 + familyIncome + education, family=binomial, data=SLID) Anova(mod.slid.1) mod.slid.2 <- glm(working ~ region + kids0004 + kids0509 + familyIncome + education, family=binomial, data=SLID) summary(mod.slid.2) confint(mod.slid.2) # LR-based intervals, preferable confint.default(mod.slid.2) # Wald-based intervals # alternative probit model mod.slid.probit <- glm(working ~ region + kids0004 + kids0509 + familyIncome + education, family=binomial(link=probit), data=SLID) coef(mod.slid.probit)*pi/sqrt(3) coef(mod.slid.2) plot(fitted(mod.slid.2), fitted(mod.slid.probit)) abline(0, 1) logLik(mod.slid.2) logLik(mod.slid.probit) # Effect plots for the logit model library(effects) plot(allEffects(mod.slid.2), ask=FALSE) with(SLID, quantile(education, probs=c(.1, .9))) with(SLID, quantile(familyIncome, probs=c(.1, .9))) eff <- allEffects(mod.slid.2, xlevels=list(education=10:17, familyIncome=seq(11.6, 44.3, length=10))) plot(eff, ask=FALSE) # Polytomous (multinomial) logit model, BEPS data library(nnet) ?BEPS # from effects package some(BEPS) mod.beps <- multinom(vote ~ age + gender + economic.cond.national + economic.cond.household + Blair + Hague + Kennedy + Europe*political.knowledge, data=BEPS) summary(mod.beps, cor=FALSE) Anova(mod.beps) # effect plot for multinomial logit model: Europe by knowledge effect plot(effect("Europe*political.knowledge", mod.beps, xlevels=list(Europe=1:11, political.knowledge=0:3))) plot(effect("Europe*political.knowledge", mod.beps, xlevels=list(Europe=1:11, political.knowledge=0:3), given.values=c(gendermale=0.5)), style="stacked", colors=c("blue", "red", "orange"), rug=FALSE) # Proportional-odds model, WVS data library(MASS) ?WVS # from effects package some(WVS) mod.wvs.1 <- polr(poverty ~ country*(gender + religion + degree + age), data=WVS) summary(mod.wvs.1) Anova(mod.wvs.1) # approximation to LR test for proportional-odds assumption mod.wvs.2 <- polr(poverty ~ gender + country*(religion + degree + age), data=WVS) summary(mod.wvs.2) mod.wvs.2.mnlr <- multinom(poverty ~ gender + country*(religion + degree + age), data=WVS) (dev <- deviance(mod.wvs.2) - deviance(mod.wvs.2.mnlr)) # approx LR statistic (df <- length(coef(mod.wvs.2.mnlr)) - length(c(coef(mod.wvs.2), mod.wvs.2$zeta))) pchisq(dev, df, lower.tail=FALSE) AIC(mod.wvs.2) AIC(mod.wvs.2.mnlr) AIC(mod.wvs.2, k=log(nrow(WVS))) # BIC AIC(mod.wvs.2.mnlr, k=log(nrow(WVS))) # BIC # effect plot for proportional-odds model: Country by Age effect plot(effect("country*age", mod.wvs.2)) plot(effect("country*age", mod.wvs.2), style="stacked") plot(effect("country*age", mod.wvs.2, latent=TRUE))