Dear R-Developers, I think I found two bugs in max.col(). Ties between zeros are not broken, which might affect simulations. -Inf and Zero can be treated the same, which can give completely wrong results, e.g. when the second max is sought by replacing all maxs by -Inf.
To fix max.col I do offer the C-code behind my function rowMax(), which also handles NAs and seems to be faster. However, since max.col() is widely used and since I only occasionally program C, I'd appreciate if someone would do code-review and (again) thorough testing before publishing it. Please note that the code was developed under R 1.6.2 and might need porting to R 2.4.1 . Best regards Jens Oehlschlägel # -- output of max.col under 2.4.1. ------------------------ > x <- rbind(c(1,1), c(0,0), c(-Inf, 0), c(0, Inf), c(0, NA), c(NA, 0)) > rownames(x) <- paste("c(", apply(x, 1, paste, collapse=","), ")") > x [,1] [,2] c( 1,1 ) 1 1 c( 0,0 ) 0 0 c( -Inf,0 ) -Inf 0 c( 0,Inf ) 0 Inf c( 0,NA ) 0 NA c( NA,0 ) NA 0 > > cat("ties not broken for c(0,0)\n") ties not broken for c(0,0) > cat("erroneously ties broken for c(-Inf, 0)\n") erroneously ties broken for c(-Inf, 0) > tmp <- sapply(1:10, function(i)max.col(x, > ties.method="random"));rownames(tmp)<-rownames(x);tmp [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] c( 1,1 ) 2 1 2 2 1 1 1 2 1 1 c( 0,0 ) 2 2 2 2 2 2 2 2 2 2 c( -Inf,0 ) 2 1 1 2 2 1 2 1 1 1 c( 0,Inf ) 2 2 2 2 2 2 2 2 2 2 c( 0,NA ) NA NA NA NA NA NA NA NA NA NA c( NA,0 ) NA NA NA NA NA NA NA NA NA NA > cat("The following look good\n") The following look good > tmp <- sapply(1:10, function(i)max.col(x, > ties.method="first"));rownames(tmp)<-rownames(x);tmp [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] c( 1,1 ) 1 1 1 1 1 1 1 1 1 1 c( 0,0 ) 1 1 1 1 1 1 1 1 1 1 c( -Inf,0 ) 2 2 2 2 2 2 2 2 2 2 c( 0,Inf ) 2 2 2 2 2 2 2 2 2 2 c( 0,NA ) NA NA NA NA NA NA NA NA NA NA c( NA,0 ) NA NA NA NA NA NA NA NA NA NA > tmp <- sapply(1:10, function(i)max.col(x, > ties.method="last"));rownames(tmp)<-rownames(x);tmp [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] c( 1,1 ) 2 2 2 2 2 2 2 2 2 2 c( 0,0 ) 2 2 2 2 2 2 2 2 2 2 c( -Inf,0 ) 2 2 2 2 2 2 2 2 2 2 c( 0,Inf ) 2 2 2 2 2 2 2 2 2 2 c( 0,NA ) NA NA NA NA NA NA NA NA NA NA c( NA,0 ) NA NA NA NA NA NA NA NA NA NA > version _ platform i386-pc-mingw32 arch i386 os mingw32 system i386, mingw32 status major 2 minor 4.1 year 2006 month 12 day 18 svn rev 40228 language R version.string R version 2.4.1 (2006-12-18) # -- output of rowMax under 1.6.2 -------- > tmp <- sapply(1:10, function(i)rowMax(x, value=FALSE, na.rm=TRUE, > ties.method="random"));rownames(tmp)<-rownames(x);tmp [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] c( 1,1 ) 2 2 2 2 1 2 2 1 1 1 c( 0,0 ) 1 1 2 2 2 2 1 1 1 2 c( -Inf,0 ) 2 2 2 2 2 2 2 2 2 2 c( 0,Inf ) 2 2 2 2 2 2 2 2 2 2 c( 0,NA ) 1 1 1 1 1 1 1 1 1 1 c( NA,0 ) 2 2 2 2 2 2 2 2 2 2 > tmp <- sapply(1:10, function(i)rowMax(x, value=FALSE, na.rm=TRUE, > ties.method="first"));rownames(tmp)<-rownames(x);tmp [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] c( 1,1 ) 1 1 1 1 1 1 1 1 1 1 c( 0,0 ) 1 1 1 1 1 1 1 1 1 1 c( -Inf,0 ) 2 2 2 2 2 2 2 2 2 2 c( 0,Inf ) 2 2 2 2 2 2 2 2 2 2 c( 0,NA ) 1 1 1 1 1 1 1 1 1 1 c( NA,0 ) 2 2 2 2 2 2 2 2 2 2 > tmp <- sapply(1:10, function(i)rowMax(x, value=FALSE, na.rm=TRUE, > ties.method="last"));rownames(tmp)<-rownames(x);tmp [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] c( 1,1 ) 2 2 2 2 2 2 2 2 2 2 c( 0,0 ) 2 2 2 2 2 2 2 2 2 2 c( -Inf,0 ) 2 2 2 2 2 2 2 2 2 2 c( 0,Inf ) 2 2 2 2 2 2 2 2 2 2 c( 0,NA ) 1 1 1 1 1 1 1 1 1 1 c( NA,0 ) 2 2 2 2 2 2 2 2 2 2 # -- replication ---------- x <- rbind(c(1,1), c(0,0), c(-Inf, 0), c(0, Inf), c(0, NA), c(NA, 0)) rownames(x) <- paste("c(", apply(x, 1, paste, collapse=","), ")") x cat("ties not broken for c(0,0)\n") cat("erroneously ties broken for c(-Inf, 0)\n") tmp <- sapply(1:10, function(i)max.col(x, ties.method="random"));rownames(tmp)<-rownames(x);tmp cat("The following look good\n") tmp <- sapply(1:10, function(i)max.col(x, ties.method="first"));rownames(tmp)<-rownames(x);tmp tmp <- sapply(1:10, function(i)max.col(x, ties.method="last"));rownames(tmp)<-rownames(x);tmp # rowMax under R 1.6.2 tmp <- sapply(1:10, function(i)rowMax(x, value=FALSE, na.rm=TRUE, ties.method="random"));rownames(tmp)<-rownames(x);tmp tmp <- sapply(1:10, function(i)rowMax(x, value=FALSE, na.rm=TRUE, ties.method="first"));rownames(tmp)<-rownames(x);tmp tmp <- sapply(1:10, function(i)rowMax(x, value=FALSE, na.rm=TRUE, ties.method="last"));rownames(tmp)<-rownames(x);tmp # -- R 1.6.2 ----------- ## Copyright 2003-2007 Jens Oehlschlägel ## May be used (possibly modified) with name "max.col" under GPL ## Provided "as is" ## Use at own risk rowMax <- function(x, value=TRUE, index=NULL, na.rm=FALSE, ties.method = c("random", "first", "last"), use.names=FALSE){ if (!length(x)) stop("x has no length") d <- dim(x) if (!is.null(index) && !identical(d, dim(index))) stop("index has wrong dimension") if (length(d)!=2) stop("x must be a matrix") if (is.na(value) || !value){ ties.method <- pmatch(ties.method, c("random", "first", "last")) if (is.na(ties.method)) stop("illegal ties method") ret <- .C("R_row_maxwithindex_double" , x=as.double(x) , nr=d[1] , nc=d[2] , narm = as.integer(na.rm) , index=integer(d[1]) , tiesmethod=as.integer(ties.method) , NAOK = TRUE, DUP = FALSE, PACKAGE = "base")[c("index")] i <- cbind(seq(length=d[1]), ret$index) if (is.na(value)) ret$value <- x[i] if (!is.null(index)) ret$index <- index[i] if (is.na(value)){ if (use.names){ names(ret$index) <- dimnames(x)[1] names(ret$values) <- dimnames(x)[1] } return(ret) }else{ if (use.names){ names(ret$index) <- dimnames(x)[1] } return(ret$index) } }else{ ret <- .C("R_row_max_double" , x=as.double(x) , nr=d[1] , nc=d[2] , narm = as.integer(na.rm) , values=double(d[1]) , NAOK = TRUE, DUP = FALSE, PACKAGE = "base")["values"] if (use.names){ names(ret$values) <- dimnames(x)[1] } return(ret$values) } } # -- C -------------- // Author: Jens Oehlschlägel // Date: 3.3.2007 // Provided 'as is' under GPL // Use at own risk #include <R_ext/Arith.h> /* NA handling */ #include <Rmath.h> /* probably not needed */ #include <R_ext/Random.h> /* ..RNGstate */ #include <R_ext/Applic.h> /* NA handling */ #include <R_ext/Utils.h> /* probably not needed */ #define RELTOL 1e-5 void R_row_maxwithindex_double( double *x , int *nr , int *nc , int *narm , int *index , int *tiesmethod ) { int r, c, m, ntie, n_r = *nr, n_c = *nc, na_rm = *narm, ties_method = *tiesmethod; double a = 0; // to avoid uninitialized compiler warning double b, tol, small, absa; Rboolean isna; if (ties_method==1) GetRNGstate(); for (r = 0; r < n_r; r++) { /* first check row for any NAs and find the smallst entry */ small = R_PosInf; if (na_rm){ isna = TRUE; for (c = 0; c < n_c; c++){ a = x[r + c * n_r]; if (!ISNAN(a)) { isna = FALSE; absa = fabs(a); if (absa<small && absa!=0) small = absa; } } }else{ isna = FALSE; for (c = 0; c < n_c; c++) { a = x[r + c * n_r]; if (ISNAN(a)) { isna = TRUE; break; }else{ absa = fabs(a); if (absa<small && absa!=0) small = absa; } } } if (isna) { index[r] = NA_INTEGER; }else{ if (R_FINITE(small)) tol = RELTOL * small; else tol = 0; //Rprintf("small=%g tol=%g\n", small, tol); // the following loop has two parts // 1) find non-NA // 2) find extreme (and don't ask ISNAN anymore) m = NA_INTEGER; ntie = 1; for (c = 0; c < n_c; c++) { a = x[r + c * n_r]; if (!ISNAN(a)){ m = c; break; } } for (c++; c < n_c; c++) { b = x[r + c * n_r]; //Rprintf("a-tol=%g a=%g a+tol=%g b=%g\n", a-tol, a, a+tol, b); if (b > a + tol){ // MASS had >= here, which does not tie zeros and Inf ntie = 1; a = b; m = c; } else if (b >= a - tol) { if (ties_method==1){ ntie++; if (ntie * unif_rand() < 1.) m = c; }else if (ties_method==3){ m=c; } } } index[r] = m + 1; } } if (ties_method==1) PutRNGstate(); } void R_row_max_double(double *x, int *nr, int *nc, int *narm, double *values) { int r, c, n_r = *nr, n_c = *nc, na_rm = *narm; double e, extreme; Rboolean isna; for (r = 0; r < n_r; r++) { /* first check row for any NAs and find the largest entry */ extreme = R_NegInf; isna = FALSE; for (c = 0; c < n_c; c++) { e = x[r + c * n_r]; if (ISNAN(e)){ if (!na_rm){ isna = TRUE; break; } }else if (e>extreme){ extreme = e; } } if (isna) values[r] = NA_REAL; else values[r] = extreme; } } -- "Feel free" - 10 GB Mailbox, 100 FreeSMS/Monat ... Jetzt GMX TopMail testen: www.gmx.net/de/go/mailfooter/topmail-out ______________________________________________ R-devel@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-devel