##------------------------------------------------------------## ## Script for Session 7: R Graphics ## ## John Fox ## ## Introduction to the R Statistical Computing Environment ## ## ICPSR Summer Program ## ## 2010 ## ##------------------------------------------------------------## # Graphics Basics ?plot args(plot.default) # default plot method ?plot.default # points, lines, axes, frames plot(c(0, 1), c(0, 1), type="n", xlab="", ylab="") # coordinate system par("col") # graphical parameters names(par()) ?par plot(1:25, pch=1:25, xlab="Symbol Number", ylab="") # symbols lines(1:25, type="h", lty="dashed") plot(26:1, xlab="letters", ylab="", pch=letters, axes=FALSE, frame.plot=TRUE) plot(c(1, 7), c(0, 1), type="n", axes=FALSE, # lines xlab="Line Type (lty)", ylab="") box() # add frame axis(1, at=1:6) # x-axis for (lty in 1:6) lines(c(lty, lty, lty + 1), c(0, 0.5, 1), lty=lty) plot(c(0, 1), c(0, 1), type="n", xlab="", ylab="") abline(0, 1) # intercept and slope abline(c(1, -1), lty="dashed") # horizontal and vertical lines: abline(h=seq(0, 1, by=0.1), v=seq(0, 1, by=0.1), col="gray") # text par(mfrow=c(1, 2)) # array of plots plot(c(0, 1), c(0, 1), axes=FALSE, type="n", xlab="", ylab="", frame.plot=TRUE, main="(a)") text(x=c(0.2, 0.5), y=c(0.2, 0.7), c("example text", "another string")) plot(c(0, 1), c(0, 1), axes=FALSE, type="n", xlab="", ylab="", frame.plot=TRUE, main="(b)") text(locator(3), c("one","two","three")) locator() # returns mouse coordinates # arrows and line segments plot(c(1,5), c(0,1), axes=FALSE, type="n", xlab="", ylab="") arrows(x0=1:5, y0=rep(0.1, 5), x1=1:5, y1=seq(0.3, 0.9, len=5), code=3) title("(a) arrows") # nicer arrows: p.arrows() in the sfsmisc package plot(c(1,5), c(0,1), axes=FALSE, type="n", xlab="", ylab="") segments(x0=1:5, y0=rep(0.1, 5), x1=1:5, y1=seq(0.3, 0.9, len=5)) title("(b) segments") # polygons par(mfrow=c(1,1)) # restore single panel plot(c(0, 1), c(0, 1), type="n", xlab="", ylab="") polygon(c(0.2, 0.8, 0.8), c(0.2, 0.2, 0.8), col="red") polygon(c(0.2, 0.2, 0.8), c(0.2, 0.8, 0.8)) # legend plot(c(1,5), c(0,1), axes=FALSE, type="n", xlab="", ylab="", frame.plot=TRUE) legend(locator(1), legend=c("group A", "group B", "group C"), lty=1:3, pch=1:3, col=c("blue", "green", "red")) legend("bottomright", legend=c("group A", "group B", "group C"), title="Groups", lty=1:3, pch=1:3, col=c("blue", "green", "red"), bty="n") # curve curve(x*cos(25/x), 0.01, pi, n=1000) curve(sin, 0, 2*pi, ann=FALSE, axes=FALSE, lwd=2) axis(1, pos=0, at=c(0, pi/2, pi, 3*pi/2, 2*pi), labels=c(0, expression(pi/2), expression(pi), expression(3*pi/2), expression(2*pi))) axis(2, pos=0) curve(cos, add=TRUE, lty="dashed", lwd=2) legend(pi, 1, lty=1:2, lwd=2, legend=c("sine", "cosine"), bty="n") # colors pie(rep(1, length(palette())), col=palette()) palette() rainbow(10) gray(0:9/9) colors()[1:10] # first ten named colors pie(rep(1,100), col=rainbow(100), labels=rep("",100)) pie(rep(1,100), col=gray(0:100/100), labels=rep("",100)) # for palettes based on psychophysical principles, # see the colorspace package # Putting it together # explaining nearest-neighbor kernel regression library(car) # for data oldpar <- par(mfrow=c(2,2), las=1) # 2 x 2 array of graphs head(UN) nrow(UN) UN <- na.omit(UN) nrow(UN) gdp <- UN$gdp infant <- UN$infant.mortality ord <- order(gdp) # sort data by gdp gdp <- gdp[ord] infant <- infant[ord] x0 <- gdp[150] # focal x = x_(150) (for example) dist <- abs(gdp - x0) # distance from focal x h <- sort(dist)[96] # bandwidth for span of .5 (where n = 193) pick <- dist <= h # observations within window # upper-left panel plot(gdp, infant, xlab="GDP per Capita", ylab="Infant-Mortality Rate", type="n", main="(a) Observations Within the Window\nspan = 0.5") points(gdp[pick], infant[pick], col="black") points(gdp[!pick], infant[!pick], col=gray(0.75)) abline(v=x0) # focal x abline(v=c(x0 - h, x0 + h), lty=2) # window text(x0, par("usr")[4] + 10, expression(x[(150)]), xpd=TRUE) # above plotting region # upper-right panel plot(range(gdp), c(0,1), xlab="GDP per Capita", ylab="Tricube Kernel Weight", type="n", main="(b) Tricube Weights") abline(v=x0) abline(v=c(x0 - h, x0 + h), lty=2) # function to calculate tricube weights tricube <- function(x, x0, h) { z <- abs(x - x0)/h ifelse(z < 1, (1 - z^3)^3, 0) } tc <- function(x) tricube(x, x0, h) # to use with curve curve(tc, min(gdp), max(gdp), n=1000, lwd=2, add=TRUE) points(gdp[pick], tricube(gdp, x0, h)[pick], col="gray20", cex=1.5, pch=16) abline(h=c(0, 1), col="gray") # lower-left panel plot(gdp, infant, xlab="GDP per Capita", ylab="Infant-Mortality Rate", type="n", main="(c) Weighted Average (Kernal Estimate)") points(gdp[pick], infant[pick], col="black") points(gdp[!pick], infant[!pick], col=gray(0.75)) abline(v=x0) abline(v=c(x0 - h, x0 + h), lty=2) yhat <- weighted.mean(infant, w=tricube(gdp, x0, h)) # kernel estimate lines(c(x0 - h, x0 + h), c(yhat, yhat), lwd=3) # line at kernel estimate # lower-right panel plot(gdp, infant, xlab="GDP per Capita", ylab="Infant-Mortality Rate", main="(d) Complete Kernel Estimate") yhat <- numeric(length(gdp)) for (i in 1:length(gdp)){ # kernel estimate at each x x0 <- gdp[i] dist <- abs(gdp - x0) h <- sort(dist)[95] yhat[i] <- weighted.mean(infant, w=tricube(gdp, x0, h)) } lines(gdp, yhat, lwd=2) par(oldpar) # restore plotting parameters # more on positioning panels in a multi-panel plot # using par(mfrow=c(m, n)) par(mfrow=c(2, 2)) x <- seq(0, 1, length=200) Ey <- rev(1 - x^2) y <- Ey + 0.1*rnorm(200) plot(x, y, axes=FALSE, frame=TRUE, main="(a) monotone, simple", cex.main=1, xlab="", ylab="") lines(x, Ey, lwd=2) mtext("x", side=1, adj=1) mtext("y ", side=2, at=max(y), las=1) x <- seq(0.02, 0.99, length=200) Ey <- log(x/(1 - x)) y <- Ey + 0.5*rnorm(200) plot (x, y, axes=FALSE, frame=TRUE, main="(b) monotone, not simple", cex.main=1, xlab="", ylab="") lines(x, Ey, lwd=2) mtext("x", side=1, adj=1) mtext("y ", side=2, at=max(y), las=1) x <- seq(0.2, 1, length=200) Ey <- (x - 0.5)^2 y <- Ey + 0.04*rnorm(200) plot(x, y, axes=FALSE, frame=TRUE, main="(c) non-monotone, simple", cex.main=1, xlab="", ylab="") lines(x, Ey, lwd=2) mtext("x", side=1, adj=1) mtext("y ", side=2, at=max(y), las=1) # finer control using par(fig=c(x1, x2, y1, y2)) windows() # new graphics-device window; quartz() on Mac par(oma=c(0, 0, 1, 0), mar=c(2, 3, 3, 2)) # leave room in top outer margin par(fig=c(0, .5, .5, 1)) # top-left panel x <- seq(0, 1, length=200) Ey <- rev(1 - x^2) y <- Ey + 0.1*rnorm(200) plot(x, y, axes=FALSE, frame=TRUE, main="(a) monotone, simple", cex.main=1, xlab="", ylab="", col="gray", cex=0.75) lines(x, Ey, lwd=2) mtext("x", side=1, adj=1) mtext("y ", side=2, at=max(y), las=1) par(fig=c(.5, 1, .5, 1)) # top-right panel par(new=TRUE) x <- seq(0.02, 0.99, length=200) Ey <- log(x/(1 - x)) y <- Ey + 0.5*rnorm(200) plot (x, y, axes=FALSE, frame=TRUE, main="(b) monotone, not simple", cex.main=1, xlab="", ylab="", col="gray", cex=0.75) lines(x, Ey, lwd=2) mtext("x", side=1, adj=1) mtext("y ", side=2, at=max(y), las=1) par(fig=c(.25, .75, 0, .5)) # bottom panel par(new=TRUE) x <- seq(0.2, 1, length=200) Ey <- (x - 0.5)^2 y <- Ey + 0.04*rnorm(200) plot(x, y, axes=FALSE, frame=TRUE, main="(c) non-monotone, simple", cex.main=1, xlab="", ylab="", col="gray", cex=0.75) lines(x, Ey, lwd=2) mtext("x", side=1, adj=1) mtext("y ", side=2, at=max(y), las=1) title("Nonlinear Relationships", outer=TRUE) # Graph of the standard-normal density showing the area above 1.96 oldpar <- par(mar = c(5, 6, 4, 2) + 0.1) # leave room on the left oldpar # old parameter saved z <- seq(-4, 4, length=1000) p <- dnorm(z) plot(z, p, type="l", lwd=2, main=expression("The Standard Normal Density Function" ~~ phi(z)), ylab=expression(phi(z) == frac(1, sqrt(2*pi)) * ~~ e^- ~~ frac(z^2, 2))) abline(h=0, col="gray") abline(v=0, col="gray") z0 <- z[z >= 1.96] # define region to fill z0 <- c(z0[1], z0) p0 <- p[z >= 1.96] p0 <- c(0, p0) polygon(z0, p0, col="gray") arrows(2.5, dnorm(2.5), 3, 0.3, code=1, length=0.125) text(3, 0.3, pos=3, # text above tail of arrow expression(integral(phi(z)*dz, 1.96, infinity) == .025)) par(oldpar) # restore margins # Trellis displays (implemented in lattice package; uses grid package) library(nlme) # for data library(lattice) # for Trellis graphics data(MathAchieve) head(MathAchieve) data(MathAchSchool) head(MathAchSchool) # data management Bryk <- MathAchieve[, c("School", "SES", "MathAch")] Sector <- MathAchSchool$Sector names(Sector) <- row.names(MathAchSchool) Bryk$Sector <- Sector[as.character(Bryk$School)] head(Bryk) # examine 20 Catholic and 20 public schools set.seed(101010) # for reproducibility attach(Bryk) cat <- sample(unique(School[Sector=="Catholic"]), 20) Cat.20 <- Bryk[School %in% cat,] # students in 20 Catholic schools pub <- sample(unique(School[Sector=="Public"]), 20) Pub.20 <- Bryk[School %in% pub,] # students in 20 public schools trellis.device() xyplot(MathAch ~ SES | School, data=Cat.20, main="Catholic Schools", ylab="Math Achievement", panel=function(x, y){ panel.xyplot(x, y) panel.loess(x, y, span=1) panel.lmline(x, y, lty=2) } ) trellis.device() xyplot(MathAch ~ SES | School, data=Pub.20, main="Public Schools", ylab="Math Achievement", panel=function(x, y){ panel.xyplot(x, y) panel.loess(x, y, span=1) panel.lmline(x, y, lty=2) } )