##-------------------------------------## ## Script for R Graphics ## ## John Fox ## ## Programming in R ## ## ICPSR Summer Program 2009 ## ##-------------------------------------## # Graphics Basics # points, lines, axes, frames plot(c(0,1), c(0,1), type="n", xlab="", ylab="") # coordinate system ?plot par("col") # graphical parameters par() ?par plot(1:25, pch=1:25, xlab="Symbol Number", ylab="") # symbols lines(1:25, type="h", lty="dashed") plot(26:1, xlab="letters", ylab="", pch=letters, axes=FALSE, frame=TRUE) plot(c(1,7), c(0,1), type="n", axes=FALSE, # lines xlab="Line Type (lty)", ylab="") box() axis(1, at=1:6) for (lty in 1:6) lines(c(lty, lty, lty + 1), c(0, 0.5, 1), lty=lty) # text par(mfrow=c(1,2)) # array of plots plot(c(0,1), c(0,1), axes=FALSE, type="n", xlab="", ylab="") box() text(x=c(.2, .5), y=c(.2, .7), c("example text", "another string")) title("(a)") plot(c(0,1), c(0,1), axes=FALSE, type="n", xlab="", ylab="") box() text(locator(3), c("one","two","three"), adj=0) # position by mouse, left justified title("(b)") locator() # returns mouse coordinates # arrows and line segments plot(c(1,5), c(0,1), axes=FALSE, type="n", xlab="", ylab="") arrows(x0=1:5, y0=rep(0.1, 5), x1=1:5, y1=seq(0.3, 0.9, len=5), code=3) title("(a) arrows") # nicer arrows: p.arrows() in the sfsmisc package plot(c(1,5), c(0,1), axes=FALSE, type="n", xlab="", ylab="") segments(x0=1:5, y0=rep(0.1, 5), x1=1:5, y1=seq(0.3, 0.9, len=5)) title("(b) segments") # polygons par(mfrow=c(1,1)) # restore single panel plot(c(0,1), c(0,1), type="n", xlab="", ylab="") polygon(c(.2,.8,.8), c(.2,.2,.8), col="red") polygon(c(.2,.2,.8), c(.2,.8,.8)) # legend plot(c(1,5), c(0,1), axes=FALSE, type="n", xlab="", ylab="", frame.plot=TRUE) legend(locator(1), legend=c("group A", "group B", "group C"), lty=1:3, pch=1:3, col=c("blue", "green", "red")) # colors pie(rep(1, length(palette())), col=palette()) palette() rainbow(10) gray(0:8/8) pie(rep(1,100), col=rainbow(100), labels=rep("",100)) pie(rep(1,100), col=gray(0:100/100), labels=rep("",100)) # for palettes based on psychophysical principles, see the colorspace package # Putting it together # diagrams of the standard normal density function # showing the area above 1.96 oldpar <- par(mar = c(5, 6, 4, 2) + 0.1) # leave room on the left oldpar # old parameter saved; par(oldpar) to restore z <- seq(-4, 4, length=1000) p <- dnorm(z) plot(z, p, type="l", lwd=2, main=expression("The Standard Normal Density Function" ~~ phi(z)), ylab=expression(phi(z) == frac(1, sqrt(2*pi)) * ~~ e^- ~~ frac(z^2, 2))) abline(h=0, col="gray") abline(v=0, col="gray") z0 <- z[z >= 1.96] # define region to fill z0 <- c(z0[1], z0) p0 <- p[z >= 1.96] p0 <- c(0, p0) polygon(z0, p0, col="gray") coords <- locator(2) # locate head and tail of arrow arrows(coords$x[1], coords$y[1], coords$x[2], coords$y[2], code=1, length=0.125) text(coords$x[2], coords$y[2], pos=3, # text above tail of arrow expression(integral(phi(z)*dz, 1.96, infinity) == .025)) # with lines at z = -3:3 par(oldpar) # restore graphics parameters plot(z, p, type="n", xlab="", ylab="", axes=FALSE, main=expression("The Standard Normal Density Function" ~~ phi(z))) axis(1, pos=0, at=-3:3) abline(h=0) axis(2, pos=0, at=.1*1:3) abline(v=0) lines(z, p, lwd=2) text(locator(2), c("z", expression(phi(z))), xpd=TRUE) for (z0 in -3:3) lines(c(z0, z0), c(0, dnorm(z0)), lty=2) # a four-panel display explaining kernel regression UN <- read.table("http://socserv.socsci.mcmaster.ca/jfox/Courses/R-programming/UnitedNations.txt") UN.2 <- na.omit(UN[, c("life.female", "gdp.capita")]) # valid data only head(UN.2) par(mfrow=c(2,2)) # 2 x 2 array of graphs gdp <- UN.2$gdp.capita life <- UN.2$life.female ord <- order(gdp) # sort data by gdp gdp <- gdp[ord] life <- life[ord] x0 <- gdp[120] # focal x = x_(120) dist <- abs(gdp - x0) # distance from focal x h <- sort(dist)[95] # bandwidth for span of .5 (where n = 190) pick <- dist <= h # observations within window plot(gdp, life, xlab="GDP per Capita", ylab="Female Expectation of Life", type="n", main="(a) Observations Within the Window\nspan = 0.5") points(gdp[pick], life[pick], col="red") points(gdp[!pick], life[!pick], col="gray60") abline(v=x0) # focal x abline(v=c(x0 - h, x0 + h), lty=2) # window text(locator(1), expression(x[(120)]), xpd=TRUE, col="red") plot(range(gdp), c(0,1), xlab="GDP per Capita", ylab="Tricube Kernel Weight", type="n", main="(b) Tricube Weights") abline(v=x0) abline(v=c(x0 - h, x0 + h), lty=2) # function to calculate tricube weights: tricube <- function(z) ifelse(abs(z) < 1, (1 - (abs(z))^3)^3, 0) x <- seq(min(gdp), max(gdp), length=1000) z <- (x - x0)/h lines(spline(x, tricube(z)), lwd=2, col="blue") points(gdp[pick], tricube(((gdp-x0)/h)[pick]), col="red") abline(h=c(0,1), col="gray") plot(gdp, life, xlab="GDP per Capita", ylab="Female Expectation of Life", type="n", main="(c) Weighted Average (Kernal Estimate)") points(gdp[pick], life[pick], col="red") points(gdp[!pick], life[!pick], col="gray60") abline(v=x0) abline(v=c(x0 - h, x0 + h), lty=2) yhat <- weighted.mean(life, tricube((gdp - x0)/h)) # kernel estimate lines(c(x0 - h, x0 + h), c(yhat, yhat), lwd=3, col="blue") plot(gdp, life, xlab="GDP per Capita", ylab="Female Expectation of Life", main="(d) Complete Kernel Estimate") yhat <- rep(0, length(gdp)) for (i in 1:length(gdp)){ # kernel estimate at each x x0 <- gdp[i] dist <- abs(gdp - x0) h <- sort(dist)[95] yhat[i] <- weighted.mean(life, tricube((gdp - x0)/h)) } lines(gdp, yhat, lwd=2, col="blue") # Trellis displays (implemented in lattice package; uses grid package) library(nlme) # for data library(lattice) # for Trellis graphics data(MathAchieve) head(MathAchieve) data(MathAchSchool) head(MathAchSchool) # data management Bryk <- MathAchieve[, c("School", "SES", "MathAch")] Sector <- MathAchSchool$Sector names(Sector) <- row.names(MathAchSchool) Bryk$Sector <- Sector[as.character(Bryk$School)] head(Bryk) # examine 20 Catholic and 20 public schools set.seed(12345) # for reproducibility attach(Bryk) cat <- sample(unique(School[Sector=="Catholic"]), 20) Cat.20 <- Bryk[School %in% cat,] pub <- sample(unique(School[Sector=="Public"]), 20) Pub.20 <- Bryk[School %in% pub,] trellis.device() # graphics device for trellis display xyplot(MathAch ~ SES | School, data=Cat.20, main="Catholic Schools", ylab="Math Achievement", panel=function(x, y){ panel.xyplot(x, y) panel.loess(x, y, span=1) panel.lmline(x, y, lty=2) } ) trellis.device() pubplot <- xyplot(MathAch ~ SES | School, data=Pub.20, main="Public Schools", ylab="Math Achievement", panel=function(x, y){ panel.xyplot(x, y) panel.loess(x, y, span=1) panel.lmline(x, y, lty=2) } ) class(pubplot) pubplot # "print" trellis object # more on positioning panels in a multi-panel plot # using par(mfrow=c(m, n)) windows(length=5, width=5) par(mfrow=c(2, 2)) X <- seq(0, 1, length=200) EY <- rev(1 - X^2) Y <- EY + 0.1*rnorm(200) plot(X, Y, axes=FALSE, frame=TRUE, main="(a) mononotone, simple", cex.main=1, xlab="", ylab="") lines(X, EY, lwd=3) text(locator(2), c("X", "Y"), xpd=TRUE) X <- seq(0.02, 0.99, length=200) EY <- log(X/(1 - X)) Y <- EY + 0.5*rnorm(200) plot (X, Y, axes=FALSE, frame=TRUE, main="(b) monotone, not simple", cex.main=1, xlab="", ylab="") lines(X, EY, lwd=3) text(locator(2), c("X", "Y"), xpd=TRUE) X <- seq(0.2, 1, length=200) EY <- (X - 0.5)^2 Y <- EY + 0.04*rnorm(200) plot(X, Y, axes=FALSE, frame=TRUE, main="(c) nonmonotone", cex.main=1, xlab="", ylab="") lines(X, EY, lwd=3) text(locator(2), c("X", "Y"), xpd=TRUE) # finer control using par(fig=c(x1, x2, y1, y2)) windows() par(fig=c(0, .5, .5, 1)) X <- seq(0, 1, length=200) EY <- rev(1 - X^2) Y <- EY + 0.1*rnorm(200) plot(X, Y, axes=FALSE, frame=TRUE, main="(a) monotone, simple", cex.main=1, xlab="", ylab="") lines(X, EY, lwd=3) text(locator(2), c("X", "Y"), xpd=TRUE) par(new=TRUE) # don't clear plotting device (act as if "new") par(fig=c(.5, 1, .5, 1)) X <- seq(0.02, 0.99, length=200) EY <- log(X/(1 - X)) Y <- EY + 0.5*rnorm(200) plot (X, Y, axes=FALSE, frame=TRUE, main="(b) monotone, not simple", cex.main=1, xlab="", ylab="") lines(X, EY, lwd=3) text(locator(2), c("X", "Y"), xpd=TRUE) par(new=TRUE) par(fig=c(.25, .75, 0, .5)) X <- seq(0.2, 1, length=200) EY <- (X - 0.5)^2 Y <- EY + 0.04*rnorm(200) plot(X, Y, axes=FALSE, frame=TRUE, main="(c) nonmonotone", cex.main=1, xlab="", ylab="") lines(X, EY, lwd=3) text(locator(2), c("X", "Y"), xpd=TRUE) # Other notable graphics packages: grid, ggplot2, rgl