I propose the following enhancements and changes to plot.lm(), the most important of which is the addition of a Residuals vs Leverage plot.
(1) A residual versus leverage plot has been added, available by specifying which = 5, and not included as one of the default plots. Contours of Cook's distance are included, by default at values of 0.5 and 1.0. The labeled points, if any, are those with the largest Cook's distances. The parameter cook.levels can be changed as required, to control what contours appear.
(2) Remove the word "plot" from the captions for which=2, 3, 4. It is redundant.
(3) Now that the pos argument to text() is vectorized, use that in preference to an offset.
(4) For which!=4 or 5, by default use pos=4 on the left half of the panel, and pos=2 on the right half of the panel. This prevents labels from appearing outside the plot area, where they can overlap other graphical features. The parameter label.pos allows users to change this default.
The modified code that I propose is below. This, a modified .Rd file, and files from diff used with the April 20 development version, are in my directory
http://wwwmaths.anu.edu.au/~johnm/r/plot-lm/
I believe the Residual-Leverage plot is given in Krause & Olsen, whether with Cook's distance contours I do not recall. I do not have access to a copy of this book. Martin Maechler drew my attention to it in 2003, as superior to the Cook's distance plot. I have finally got around to coding it up!
John Maindonald.
"plot.lm" <-
function (x, which = 1:4, caption = c("Residuals vs Fitted",
"Normal Q-Q", "Scale-Location",
"Cook's distance", "Residuals vs Leverage"),
panel = points, sub.caption = deparse(x$call), main = "",
ask = prod(par("mfcol")) < length(which) && dev.interactive(),
..., id.n = 3, labels.id = names(residuals(x)), cex.id = 0.75,
cook.levels=c(0.5, 1.0), label.pos=c(4,2))
{
if (!inherits(x, "lm"))
stop("Use only with 'lm' objects")
if (!is.numeric(which) || any(which < 1) || any(which > 5))
stop("`which' must be in 1:5")
isGlm <- inherits(x, "glm")
show <- rep(FALSE, 5)
show[which] <- TRUE
r <- residuals(x)
yh <- predict(x)
w <- weights(x)
if (!is.null(w)) {
wind <- w != 0
r <- r[wind]
yh <- yh[wind]
w <- w[wind]
labels.id <- labels.id[wind]
}
n <- length(r)
if (any(show[2:5])) {
s <- if (inherits(x, "rlm"))
x$s
else sqrt(deviance(x)/df.residual(x))
hii <- lm.influence(x, do.coef = FALSE)$hat
if (any(show[4:5])) {
cook <- if (isGlm)cooks.distance(x)
else cooks.distance(x, sd = s, res = r)
}
}
if (any(show[c(2:3,5)])) {
ylab23 <- if (isGlm)
"Std. deviance resid."
else "Standardized residuals"
r.w <- if (is.null(w))
r
else sqrt(w) * r
rs <- r.w/(s * sqrt(1 - hii))
}
if (any(show[c(1, 3)]))
l.fit <- if (isGlm)
"Predicted values"
else "Fitted values"
if (is.null(id.n))
id.n <- 0
else {
id.n <- as.integer(id.n)
if (id.n < 0 || id.n > n)
stop("`id.n' must be in {1,..,", n, "}")
}
if (id.n > 0) {
if (is.null(labels.id))
labels.id <- paste(1:n)
iid <- 1:id.n
show.r <- sort.list(abs(r), decreasing = TRUE)[iid]
if (any(show[2:3]))
show.rs <- sort.list(abs(rs), decreasing = TRUE)[iid]
text.id <- function(x, y, ind, adj.x = FALSE){
midx <- mean(range(x))
labpos <- if (!adj.x) 3
else
label.pos[1+as.numeric(x>midx)]
text(x, y, labels.id[ind], cex = cex.id, xpd = TRUE, pos=labpos,
offset=0.25)
}
}
one.fig <- prod(par("mfcol")) == 1
if (ask) {
op <- par(ask = TRUE)
on.exit(par(op))
}
if (show[1]) {
ylim <- range(r, na.rm = TRUE)
if (id.n > 0)
ylim <- ylim + c(-1, 1) * 0.08 * diff(ylim)
plot(yh, r, xlab = l.fit, ylab = "Residuals", main = main,
ylim = ylim, type = "n", ...)
panel(yh, r, ...)
if (one.fig)
title(sub = sub.caption, ...)
mtext(caption[1], 3, 0.25)
if (id.n > 0) {
y.id <- r[show.r]
y.id[y.id < 0] <- y.id[y.id < 0] - strheight(" ")/3
text.id(yh[show.r], y.id, show.r, adj.x = TRUE)
}
abline(h = 0, lty = 3, col = "gray")
}
if (show[2]) {
ylim <- range(rs, na.rm = TRUE)
ylim[2] <- ylim[2] + diff(ylim) * 0.075
qq <- qqnorm(rs, main = main, ylab = ylab23, ylim = ylim,
...)
if (one.fig)
title(sub = sub.caption, ...)
mtext(caption[2], 3, 0.25)
if (id.n > 0)
text.id(qq$x[show.rs], qq$y[show.rs], show.rs, adj.x = TRUE)
}
if (show[3]) {
sqrtabsr <- sqrt(abs(rs))
ylim <- c(0, max(sqrtabsr, na.rm = TRUE))
yl <- as.expression(substitute(sqrt(abs(YL)), list(YL = as.name(ylab23))))
yhn0 <- if (is.null(w))
yh
else yh[w != 0]
plot(yhn0, sqrtabsr, xlab = l.fit, ylab = yl, main = main,
ylim = ylim, type = "n", ...)
panel(yhn0, sqrtabsr, ...)
if (one.fig)
title(sub = sub.caption, ...)
mtext(caption[3], 3, 0.25)
if (id.n > 0)
text.id(yhn0[show.rs], sqrtabsr[show.rs], show.rs,
adj.x = TRUE)
}
if (show[4]) {
if (id.n > 0) {
show.r <- order(-cook)[iid]
ymx <- cook[show.r[1]] * 1.075
}
else ymx <- max(cook)
plot(cook, type = "h", ylim = c(0, ymx), main = main,
xlab = "Obs. number", ylab = "Cook's distance", ...)
if (one.fig)
title(sub = sub.caption, ...)
mtext(caption[4], 3, 0.25)
if (id.n > 0)
text.id(show.r, cook[show.r], show.r, adj.x=FALSE)
}
if (show[5]) {
ylim <- range(rs, na.rm = TRUE)
hatval <- hatvalues(x)
if (id.n > 0) {
ylim <- ylim + c(-1, 1) * 0.08 * diff(ylim)
show.r <- order(-cook)[iid]
}
plot(hatval, rs, ylim = ylim, main = main,
xlab = "Leverages", ylab = ylab23,
type="n", ...)
panel(hatval, rs, ...)
if (one.fig)
title(sub = sub.caption, ...)
p <- length(coef(x))
for(crit in cook.levels){
curve(sqrt(crit*p*(1-x)/x), lty=2, add=T)
curve(-sqrt(crit*p*(1-x)/x), lty=2, add=T)
}
xmax <- par()$usr[2]
ymult <- sqrt(p*(1-xmax)/xmax)
aty <- c(-sqrt(rev(cook.levels))*ymult, sqrt(cook.levels)*ymult)
axis(4, at=aty, labels=paste(c(rev(cook.levels), cook.levels)),
mgp=c(.25,.25,0), las=2, tck=0, cex.axis=cex.id)
mtext(caption[5], 3, 0.25)
if (id.n > 0) {
y.id <- rs[show.r]
y.id[y.id < 0] <- y.id[y.id < 0] - strheight(" ")/3
text(hatval[show.r], y.id, paste(show.r), pos=2, cex=cex.id, offset=0.25)
}
}
if (!one.fig && par("oma")[3] >= 1)
mtext(sub.caption, outer = TRUE, cex = 1.25)
invisible()
}
John Maindonald email: [EMAIL PROTECTED] phone : +61 2 (6125)3473 fax : +61 2(6125)5549 Centre for Bioinformation Science, Room 1194, John Dedman Mathematical Sciences Building (Building 27) Australian National University, Canberra ACT 0200.
______________________________________________ R-devel@stat.math.ethz.ch mailing list https://stat.ethz.ch/mailman/listinfo/r-devel