##------------------------------------------------------------------------## ## Script for Chapter 1, An R and S-PLUS Companion to Applied Regression ## ## John Fox ## ## Sage Publications, 2002 ## ##------------------------------------------------------------------------## # preliminaries options(width=65) options(digits=5) # Basics # arithmetic, interacting with the interpreter 2+3 2-3 2*3 2/3 2^3 4^2-3*2 (4^2)-(3*2) 1-6+4 2^-3 -2--3 -2 - -3 # functions, obtaining help log(100) log(100, base=10) log(100, b=10) help(log) log(100,10) "+"(2,3) # vectors c(1,2,3,4) 1:4 4:1 -1:2 seq(1,4) seq(2, 8, by=2) seq(0, 1, by=.1) seq(0, 1, length=11) c(1,2,3,4)/2 c(1,2,3,4)/c(4,3,2,1) log(c(0.1,1,10,100), 10) c(1,2,3,4) + c(4,3) c(1,2,3,4) + c(4,3,2) # variables x <- c(1,2,3,4) x x/2 y <- sqrt(x) y x <- rnorm(100) x summary(x) x[21] x[11:20] x[-(11:100)] #comparison and logical operators 1 == 2 1 != 2 1 <= 2 1 < 2 1 > 2 1 >= 2 T & T T & F F & F T | T T | F F | F ! c(T, F) z <- x[1:10] z z < -0.5 z > 0.5 z < -0.5 | z>0.5 abs(z) > 0.5 z[abs(z) > 0.5] # user-defined functions mean(x) sum(x)/length(x) my.mean <- function(x) sum(x)/length(x) my.mean(x) my.mean(y) my.mean(1:100) x # cleaning up objects() last.warning remove(x, y, z) objects() # using traceback() letters my.mean(letters) traceback() # Duncan example # creating a data frame from data stored in a file Duncan<-read.table('D:/data/Duncan.txt', header=T) Duncan summary(Duncan) # attaching a data frame attach(Duncan) prestige # distributions and bivariate relationships hist(prestige) 2*sqrt(length(prestige)) hist(prestige, nclass=10) pairs(cbind(prestige,income,education), panel=function(x,y){ points(x,y) abline(lm(y~x), lty=2) lines(lowess(x,y)) }, diag.panel=function(x){ par(new=T) hist(x, main="", axes=F, nclass=12) } ) # Using S-PLUS version of pairs: pairs(cbind(prestige,income,education), panel=function(x,y){ points(x,y) abline(lm(y~x), lty=2) lines(lowess(x,y)) } ) scatmat <- function(..., nclass=NULL) { pairs(cbind(...), panel=function(x,y){ points(x,y) abline(lm(y~x), lty=2) lines(lowess(x,y)) }, diag.panel=function(x){ par(new=T) hist(x, main="", axes=F, nclass=nclass) } ) } scatmat(prestige,income,education, nclass=12) plot(income, education) identify(income, education, row.names(Duncan)) row.names(Duncan)[c(6,16,27)] # fitting a regression duncan.model <- lm(prestige ~ income + education) duncan.model summary(duncan.model) # regression diagnostics library(car) hist(rstudent(duncan.model), nclass=12) qq.plot(duncan.model, labels=row.names(Duncan), simulate=T) plot(hatvalues(duncan.model)) abline(h = c(2,3)*3/45) identify(1:45, hatvalues(duncan.model), row.names(Duncan)) plot(cookd(duncan.model)) abline(h = 4/(45-2-1)) identify(1:45, cookd(duncan.model), row.names(Duncan)) av.plots(duncan.model, labels=row.names(Duncan)) cr.plots(duncan.model) spread.level.plot(duncan.model) ncv.test(duncan.model) ncv.test(duncan.model, var.formula=~income+education) # refitting the model remove <- which.names(c('minister', 'conductor'), Duncan) remove duncan.model.2 <- update(duncan.model, subset=-remove) summary(duncan.model.2)