##------------------------------------------------------------------------##
## Script for Chapter 8, An R and S-PLUS Companion to Applied Regression  ##
##    John Fox                                                            ##
##    Sage Publications, 2002                                             ##
##    (corrected 10 September 2008)                                       ##
##------------------------------------------------------------------------##

options(width=65)
options(digits=5)

# writing functions

influence.plot <- function(model, scale=10, col=c(1,2),
    labels=names(rstud), ...){
    hatval <- hatvalues(model)
    rstud <- rstudent(model)
    cook <- sqrt(as.vector(cookd(model)))
            # as.vector is needed for S3
    scale <- scale/max(cook)
    p <- length(coef(model))
    n <- length(rstud)
    cutoff <- sqrt(4/(n - p))
    plot(hatval, rstud, xlab='Hat-Values',
        ylab='Studentized Residuals', type='n', ...)
    abline(v=c(2, 3)*p/n, lty=2)
    abline(h=c(-2, 0, 2), lty=2)
    for (i in 1:n) 
        points(hatval[i], rstud[i], cex=scale*cook[i], 
            col=if (cook[i] > cutoff) col[2] else col[1])
    if (labels[1] != FALSE) identify(hatval, rstud, labels)
    }

library(car)
data(Duncan)
influence.plot(lm(prestige ~ income + education, data=Duncan),
    ylim=c(-3, 4), col=gray(c(0.5, 0)))


# Matrix operations

A <- matrix(c(1, 2, -4, 3, 5, 0), 2, 3)
B <- matrix(1:6, 2, 3)
C <- matrix(c(2, -2, 0, 1, -1, 1, 4 ,4, -4), 3, 3, byrow=T)
A; B; C

A + B
A - B
A + C
2*A
-A

A %*% C

a <- rep(1, 3)
b <- c(1, 5, 3)
C %*% a
a %*% C
a %*% b

outer(a, b)

t(B)

solve(C)
round(solve(C), 5)

library(MASS)
fractions(solve(C))

solve(C, b)

solve(C) %*% b

D <- matrix(c(1, 2, 3, 5, 7, 8), 3, 2, byrow=T)
D
solve(D, b)

solve(A, c(1,2))

data(Prestige)
attach(Prestige)
X <- cbind(1, as.matrix(Prestige[,1:3]))
y <- Prestige[,4]
X[1:5,]
y

Prestige[,4, drop=F]

solve(t(X) %*% X) %*% t(X) %*% y

solve(crossprod(X)) %*% crossprod(X, y)

lm(prestige ~ education + income + women)

R <- cor(cbind(education, income, women))
R

eigen(R)

det(R)

    ## for S3:
    det <- function(X) prod(eigen(X, only.values=T)$values)
    det(R)

diag(R)  # extract diagonal
diag(R) <- NA  # set diagonal
R
diag(1:3)  # make diagonal matrix
diag(3)  # order-3 identity matrix

    
# control structures
    
    # conditionals

abs.1 <- function(x) if (x < 0) -x else x
abs.1(-5)
abs.1(5)

abs.1(-3:3)  # the first element, -3, controls the result


abs.2 <- function(x) ifelse(x < 0, -x, x)
abs.2(-3:3)

sign.1 <- function(x) {
    if (x < 0) -1
        else if (x > 0) 1
            else 0
    }
sign.1(-5)

sign.2 <- function(x) {
    ifelse (x < 0, -1, 
        ifelse(x > 0, 1, 0))
    }    
sign.2(c(-5, 0, 10))

sign.3 <- function (x) {
    switch(which(x < 0, x == 0, x > 0),
        -1, 0, 1)
    }    
sign.2(c(-5, 0, 10))
    
    # loops

prod(1:5)
gamma(5 + 1)

fact.1 <- function (x){
    if (x <= 1) return(1)
    f <- 1
    for (i in 1:x) f <- f * i
    f
    }
fact.1(5)

fact.2 <- function (x){
    i <- f <- 1
    while (i <= x) {
        f <- f * i
        i <- i + 1
        }
    f
    }
fact.2(5)

fact.3 <- function(x){
    if ((!is.numeric(x)) || (x != floor(x))
         || (x < 0) || (length(x) > 1))
        stop('argument must be a non-negative integer')
    i <- f <- 1
    repeat {
        f <- f * i
        i <- i + 1
        if (i > x) break
        }
    f
    }
fact.3(5)
fact.3(1.5)

    # recursive functions

fact.4 <- function(x){
    if (x <= 1) 1
    else x * fact.4(x - 1)
    }
fact.4(5)

factorial <- fact.4
remove(fact.4)
factorial(5)  # tries to call the removed fact.4

fact.5 <- function(x){
    if (x <= 1) 1
    else x * Recall(x - 1)
    }
fact.5(5)
factorial <- fact.5
remove(fact.5)
factorial(5)    # still works with fact.5 removed

    # an extended example: logistic regression

lreg <- function(X, y, max.iter=10, tol=1E-6){
    # X is the model matrix
    # y is the response vector of 0s and 1s
    # max.iter is the maximum number of iterations
    # tol is a convergence criterion
    X <- cbind(1, X)  # add constant
    b <- b.last <- rep(0, ncol(X))  # initialize
    it <- 1  # iteration index
    while (it <= max.iter){
        p <- as.vector(1/(1 + exp(-X %*% b)))
        V <- diag(p * (1 - p))
        var.b <- solve(t(X) %*% V %*% X)
        b <- b + var.b %*% t(X) %*% (y - p)
        if (max(abs(b - b.last)/(abs(b.last) + 0.01*tol)) < tol) break
        b.last <- b
        it <- it + 1  # increment index
        }
    if (it > max.iter) warning('maximum iterations exceeded')
    list(coefficients=as.vector(b), var=var.b, iterations=it)
    }  
    
data(Mroz)
attach(Mroz)
Mroz[1:5,]

lfp <- recode(lfp, " 'yes'=1; 'no'=0 ", as.factor=F)
wc <- recode(wc, " 'yes'=1; 'no'=0 ", as.factor=F)
hc <- recode(hc, " 'yes'=1; 'no'=0 ", as.factor=F)
mod.mroz <- lreg(cbind(k5, k618, age, wc, hc, lwg, inc), lfp)

mod.mroz$coefficients
sqrt(diag(mod.mroz$var))


lreg.2 <- function(X, y, method='BFGS'){
    X <- cbind(1, X)
    negLogL <- function(b, X, y) {
        p <- as.vector(1/(1 + exp(-X %*% b)))
        - sum(y*log(p) + (1 - y)*log(1 - p))
        }
    grad <- function(b, X, y){
        p <- as.vector(1/(1 + exp(-X %*% b)))
        - apply(((y - p)*X), 2, sum)
            }
    result <- optim(rep(0, ncol(X)), negLogL, gr=grad,
        hessian=T, method=method, X=X, y=y)
    list(coefficients=result$par, var=solve(result$hessian), 
        deviance=2*result$value, converged=result$convergence == 0)
    }

mod.mroz.2 <- lreg.2(cbind(k5, k618, age, wc, hc, lwg, inc), lfp)
mod.mroz.2$coefficients
sqrt(diag(mod.mroz.2$var))
mod.mroz.2$converged

    ##S-PLUS alternative:
    
    lreg.2 <- function(X, y){
        X <- cbind(1, X)
        k <- ncol(X)
        tri <- outer(1:k, 1:k, "<=")  # triangle
        negLogL <- function(b, X, y) {
            p <- as.vector(1/(1 + exp(-X %*% b)))
            - sum(y*log(p) + (1 - y)*log(1 - p))
            }
        grad <- function(b, X, y, tri){
            p <- as.vector(1/(1 + exp(-X %*% b)))
            grad <- - colSums((y - p)*X)
            hess <- (t(X) %*% diag(p*(1 - p)) %*% X)[tri]
            list(gradient=grad, hessian=hess)
            }
        result <- nlminb(rep(0, ncol(X)), negLogL, gradient=grad, hessian=T,
            X=X, y=y, tri=tri)
        list(coefficients=result$par,
            deviance=2*result$objective, var=solve(result$hessian),
            gradient=result$grad.norm, message=result$message)
        }


# apply and its relatives

detach(Mroz)
data(DavisThin)
DavisThin[1:10,]

DavisThin$thin.drive <- apply(DavisThin, 1, sum)
DavisThin$thin.drive

apply(DavisThin, 2, mean)

DavisThin$thin.drive <- NULL
DavisThin[1,2] <- DavisThin[2,4] <- DavisThin[10,3] <- NA
DavisThin[1:10,]

apply(DavisThin, 1, sum)[1:10]

apply(DavisThin, 1, function(x) 7*mean(x, na.rm=T))[1:10]

DavisThin[1,2:5] <- NA
DavisThin[1:10,]
make.scale <- function(items){
    if(sum(is.na(items)) >= 4) NA
        else 7*mean(items, na.rm=T)
    }
apply(DavisThin, 1, make.scale)[1:10]

thin.list <- as.list(DavisThin)
thin.list

lapply(thin.list, mean, na.rm=T)

sapply(thin.list, mean, na.rm=T)

data(Moore)
attach(Moore)
Moore

tapply(conformity, 
    list(Status=partner.status, Authoritarianism=fcategory), mean)


# classes and object-oriented programming

mod.prestige <- lm(prestige ~ income + education + women, 
    data=Prestige)
attributes(mod.prestige)
class(mod.prestige)

print

mod.prestige
print(mod.prestige)
print.lm(mod.prestige)

mod.mroz <- glm(lfp~., family=binomial, data=Mroz)
class(mod.mroz)

lreg.3 <- function(X, y, predictors=colnames(X), max.iter=10,
        tol=1E-6, constant=T){
    if (!is.numeric(X) || !is.matrix(X)) 
        stop('X must be a numeric matrix')
    if (!is.numeric(y) || !all(y == 0 | y == 1)) 
        stop('y must contain only 0s and 1s')
    if (nrow(X) != length(y)) 
        stop('X and y contain different numbers of observations')
    if (constant) {
        X <- cbind(1, X)
        colnames(X)[1] <- 'Constant'
        }
    b <- b.last <- rep(0, ncol(X))
    it <- 1
    while (it <= max.iter){
        p <- as.vector(1/(1 + exp(-X %*% b)))
        V <- diag(p * (1 - p))
        var.b <- solve(t(X) %*% V %*% X)
        b <- b + var.b %*% t(X) %*% (y - p)
        if (max(abs(b - b.last)/(abs(b.last) + 0.01*tol)) < tol) break
        b.last <- b
        it <- it + 1
        }
        dev <- -2*sum(y*log(p) + (1 - y)*log(1 - p))
    if (it > max.iter) warning('maximum iterations exceeded')
    result <- list(coefficients=as.vector(b), var=var.b, 
        deviance=dev, converged= it <= max.iter,
        predictors=predictors)
    class(result) <- 'lreg'
    result
    }

mod.mroz.3 <- lreg.3(cbind(k5, k618, age, wc, hc, lwg, inc), lfp)
class(mod.mroz.3)
mod.mroz.3

print.lreg <- function(x) {
    coef <- x$coefficients
    names(coef) <- x$predictors
    print(coef)
    if (!x$converged) cat('\n *** lreg did not converge ***\n')
    invisible(x)
    }

summary.lreg <- function(object) {
    b <- object$coefficients
    se <- sqrt(diag(object$var))
    z <- b/se
    table <- cbind(b, se, z, 2*(1-pnorm(abs(z))))
    colnames(table) <- c('Coefficient', 'Std.Error', 'z', 'p')
    rownames(table) <- object$predictors
    print(table)
    cat('\nDeviance =', object$deviance,'\n')
    if (!object$converged) cat('\n Note: *** lreg did not converge ***\n')
    }
    
mod.mroz.3
summary(mod.mroz.3)


    # using the S4 object system

colnames <- function(x){
    x <- as.matrix(x)
    dimnames(x)[[2]]
    }
'colnames<-' <- function(x, names){
    x <- as.matrix(x)
    dimnames(x)[[2]] <- names
    x
    }

rownames <- function(x){
    x <- as.matrix(x)
    dimnames(x)[[1]]
    }
'rownames<-' <- function(x, names){
    x <- as.matrix(x)
    dimnames(x)[[1]] <- names
    x
    }

setClass('lreg', 
    representation(coefficients='numeric', var='matrix',
        iterations='numeric', deviance='numeric',
        predictors='character'))
        
lreg.4 <- function(X, y, predictors=colnames(X),
    constant=T, max.iter=10, tol=1E-6){
    if (!is.numeric(X) || !is.matrix(X)) 
        stop('X must be a numeric matrix')
    if (!is.numeric(y) || !all(y == 0 | y == 1)) 
        stop('y must contain only 0s and 1s')
    if (nrow(X) != length(y)) 
        stop('X and y contain different numbers of observations')
    if (constant) {
        X <- cbind(1, X)
        colnames(X)[1] <- 'Constant'
        }
    b <- b.last <- rep(0, ncol(X))
    it <- 1
    while (it <= max.iter){
        p <- as.vector(1/(1 + exp(-X %*% b)))
        V <- diag(p * (1 - p))
        var.b <- solve(t(X) %*% V %*% X)
        b <- b + var.b %*% t(X) %*% (y - p)
        if (max(abs(b - b.last)/(abs(b.last) + 0.01*tol)) < tol) break
        b.last <- b
        it <- it + 1
        }
    if (it > max.iter) warning('maximum iterations exceeded')
    result <- new('lreg', coefficients=as.vector(b), var=var.b, iterations=it,
        deviance=-2*sum(y*log(p) + (1 - y)*log(1 - p)),
        predictors=predictors)
    result
    }    

mod.mroz.4 <- lreg.4(cbind(k5, k618, age, wc, hc, lwg, inc), lfp)  
class(mod.mroz.4)
mod.mroz.4

show

setMethod('show', signature(object='lreg'),
    definition=function(object){
        coef <- object@coefficients
        names(coef) <- object@predictors
        print(coef)
        }
    )

mod.mroz.4

setMethod('summary', signature(object='lreg'),
    definition=function(object, ...){
        b <- object@coefficients
        se <- sqrt(diag(object@var))
        z <- b/se
        table <- cbind(b, se, z, 2*(1-pnorm(abs(z))))
        colnames(table) <- c('Coefficient', 'Std.Error', 'z', 'p')
        rownames(table) <- object@predictors
        print(table)
        cat('\nDeviance =', object@deviance,'\n')
        }
    )
summary(mod.mroz.4)