Dear R-developeRs, Attached follows a patch against svn 34959 that adds the printing of p-values to the TukeyHSD.aov function in stats package. I also updated the corresponding documentation file and added a 'see also' reference to the simint function of the multcomp package.
As it was already brought up in a previous thread [1] in R-help, one can obtain the adjusted p-values using the multcomp package and its simint function. The problem is that currently the simint function scales very badly for a large number of contrasts (> 15). While the output of TukeyHSD is almost instantaneous, simint may take more than half an hour to process 19 contrasts. As a toy example, try: y <- rnorm(500) A <- gl(5,100) system.time(h1 <- TukeyHSD((aov(y ~ A)))) system.time(h2 <- simint(y ~ A,type="Tukey")) Here I got: [1] 0.09 0.01 0.10 0.00 0.00 [1] 26.87 0.03 27.10 0.00 0.00 For a small number of contrasts they're equivalent, for example: data(warpbreaks) fm1 <- aov(breaks ~ wool + tension, data = warpbreaks) tHSD <- TukeyHSD(fm1, "tension", ordered = FALSE) print(tHSD) mcHSD <- simint(breaks ~ wool + tension, data = warpbreaks, whichf="tension", type="Tukey") summary(mcHSD) I also attached the complete function (mTukeyHSD.R) to this message, in case the patch is not accepted and someone else needs to do the same thing. In any case, I think the reference to the multcomp package in the TukeyHSD help page should be considered even if the patch to the function is not accepted. Thank you, References [1] http://tolstoy.newcastle.edu.au/R/help/05/05/4599.html -- Fernando Henrique Ferraz P. da Rosa http://www.ime.usp.br/~feferraz
diff -ru r-devel/R/ r-local/R diff -ru r-devel/R/src/library/stats/R/TukeyHSD.R r-local/R/src/library/stats/R/TukeyHSD.R --- r-devel/R/src/library/stats/R/TukeyHSD.R 2005-07-14 23:13:49.000000000 -0300 +++ r-local/R/src/library/stats/R/TukeyHSD.R 2005-07-15 02:43:25.055207448 -0300 @@ -66,10 +66,12 @@ center <- center[keep] width <- qtukey(conf.level, length(means), x$df.residual) * sqrt((MSE/2) * outer(1/n, 1/n, "+"))[keep] - dnames <- list(NULL, c("diff", "lwr", "upr")) + est <- center/(sqrt((MSE/2) * outer(1/n, 1/n, "+"))[keep]) + pvals <- ptukey(abs(est),length(means),x$df.residual,lower.tail=FALSE) + dnames <- list(NULL, c("diff", "lwr", "upr","p adj")) if (!is.null(nms)) dnames[[1]] <- outer(nms, nms, paste, sep = "-")[keep] - out[[nm]] <- array(c(center, center - width, center + width), - c(length(width), 3), dnames) + out[[nm]] <- array(c(center, center - width, center + width,pvals), + c(length(width), 4), dnames) } class(out) <- c("multicomp", "TukeyHSD") attr(out, "orig.call") <- x$call diff -ru r-devel/R/src/library/stats/man/TukeyHSD.Rd r-local/R/src/library/stats/man/TukeyHSD.Rd --- r-devel/R/src/library/stats/man/TukeyHSD.Rd 2005-07-14 23:14:39.380653872 -0300 +++ r-local/R/src/library/stats/man/TukeyHSD.Rd 2005-07-15 03:14:36.520277744 -0300 @@ -55,7 +55,8 @@ A list with one component for each term requested in \code{which}. Each component is a matrix with columns \code{diff} giving the difference in the observed means, \code{lwr} giving the lower - end point of the interval, and \code{upr} giving the upper end point. + end point of the interval, \code{upr} giving the upper end point + and \code{p adj} giving the p-value. } \references{ Miller, R. G. (1981) @@ -69,7 +70,8 @@ Douglas Bates } \seealso{ - \code{\link{aov}}, \code{\link{qtukey}}, \code{\link{model.tables}} + \code{\link{aov}}, \code{\link{qtukey}}, \code{\link{model.tables}}, + \code{\link[multcomp]{simint}} } \examples{ summary(fm1 <- aov(breaks ~ wool + tension, data = warpbreaks))
### ### Tukey multiple comparisons for R ### ### Copyright 2000-2001 Douglas M. Bates <[EMAIL PROTECTED]> ### Modified for base R 2002 B. D. Ripley ### ### This file is made available under the terms of the GNU General ### Public License, version 2, or at your option, any later version, ### incorporated herein by reference. ### ### This program is distributed in the hope that it will be ### useful, but WITHOUT ANY WARRANTY; without even the implied ### warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ### PURPOSE. See the GNU General Public License for more ### details. ### ### You should have received a copy of the GNU General Public ### License along with this program; if not, write to the Free ### Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, ### MA 02111-1307, USA TukeyHSD <- function(x, which, ordered = FALSE, conf.level = 0.95, ...) UseMethod("TukeyHSD") TukeyHSD.aov <- function(x, which = seq(along = tabs), ordered = FALSE, conf.level = 0.95, ...) { mm <- model.tables(x, "means") if(is.null(mm$n)) stop("no factors in the fitted model") tabs <- mm$tables[-1] tabs <- tabs[which] ## mm$n need not be complete -- factors only -- so index by names nn <- mm$n[names(tabs)] nn_na <- is.na(nn) if(all(nn_na)) stop("'which' specified no factors") if(any(nn_na)) { warning("'which' specified some non-factors which will be dropped") tabs <- tabs[!nn_na] nn <- nn[!nn_na] } out <- vector("list", length(tabs)) names(out) <- names(tabs) MSE <- sum(resid(x)^2)/x$df.residual for (nm in names(tabs)) { tab <- tabs[[nm]] means <- as.vector(tab) nms <- if(length(d <- dim(tab)) > 1) { dn <- dimnames(tab) apply(do.call("expand.grid", dn), 1, paste, collapse=":") } else names(tab) n <- nn[[nm]] ## expand n to the correct length if necessary if (length(n) < length(means)) n <- rep.int(n, length(means)) if (as.logical(ordered)) { ord <- order(means) means <- means[ord] n <- n[ord] if (!is.null(nms)) nms <- nms[ord] } center <- outer(means, means, "-") keep <- lower.tri(center) center <- center[keep] width <- qtukey(conf.level, length(means), x$df.residual) * sqrt((MSE/2) * outer(1/n, 1/n, "+"))[keep] est <- center/(sqrt((MSE/2) * outer(1/n, 1/n, "+"))[keep]) pvals <- ptukey(abs(est),length(means),x$df.residual,lower.tail=FALSE) dnames <- list(NULL, c("diff", "lwr", "upr","p adj")) if (!is.null(nms)) dnames[[1]] <- outer(nms, nms, paste, sep = "-")[keep] out[[nm]] <- array(c(center, center - width, center + width,pvals), c(length(width), 4), dnames) } class(out) <- c("multicomp", "TukeyHSD") attr(out, "orig.call") <- x$call attr(out, "conf.level") <- conf.level attr(out, "ordered") <- ordered out } print.TukeyHSD <- function(x, ...) { cat(" Tukey multiple comparisons of means\n") cat(" ", format(100*attr(x, "conf.level"), 2), "% family-wise confidence level\n", sep="") if (attr(x, "ordered")) cat(" factor levels have been ordered\n") cat("\nFit: ", deparse(attr(x, "orig.call"), 500), "\n\n", sep="") attr(x, "orig.call") <- attr(x, "conf.level") <- attr(x, "ordered") <- NULL print.default(unclass(x), ...) } plot.TukeyHSD <- function (x, ...) { for (i in seq(along = x)) { xi <- x[[i]] yvals <- nrow(xi):1 plot(c(xi[, "lwr"], xi[, "upr"]), rep.int(yvals, 2), type = "n", axes = FALSE, xlab = "", ylab = "", ...) axis(1, ...) axis(2, at = nrow(xi):1, labels = dimnames(xi)[[1]], srt = 0, ...) abline(h = yvals, lty = 1, lwd = 0, col = "lightgray") abline(v = 0, lty = 2, lwd = 0, ...) segments(xi[, "lwr"], yvals, xi[, "upr"], yvals, ...) segments(as.vector(xi), rep.int(yvals - 0.1, 3), as.vector(xi), rep.int(yvals + 0.1, 3), ...) title(main = paste(format(100 * attr(x, "conf.level"), 2), "% family-wise confidence level\n", sep = ""), xlab = paste("Differences in mean levels of", names(x)[i])) box() } }
______________________________________________ R-devel@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-devel