Here is a revised version of notSorted; change argument order (to be more like is.unsorted) and fix blunder.
notSorted <- function(x, na.rm = FALSE, decreasing = FALSE, strict = FALSE){ # return TRUE if x is not sorted # If decreasing=FALSE, check for sort in increasing order # If strict=TRUE, ties correspond to not being sorted n <- length(x) if(n < 2) return(FALSE) if(!is.atomic(x) || (!na.rm && any(is.na(x)))) return(NA) if(na.rm && any(ii <- is.na(x))){ x <- x[!ii] n <- length(x) } if(decreasing){ ifelse1(strict, any(x[-1] >= x[-n]), any(x[-1] > x[-n])) } else { # check for sort in increasing order ifelse1(strict, any(x[-1] <= x[-n]), any(x[-1] < x[-n])) } } On Tue, Jul 1, 2008 at 3:23 PM, Tim Hesterberg <[EMAIL PROTECTED]> wrote: > There is a bug in the standard version of [.data.frame; > it mixes up handling duplicates and NAs when subscripting rows. > x <- data.frame(x=1:3, y=2:4, row.names=c("a","b","NA")) > y <- x[c(2:3, NA),] > y > It creates a data frame with duplicate rows, but won't print. > > In the previous message I included a version of [.data.frame; > it fails for the same example, for a different reason. Here > is a fix. > > > "subscript.data.frame" <- > function (x, i, j, > drop = if (missing(i)) TRUE else length(cols) == 1) > { > # This version of [.data.frame avoid wasting time enforcing unique > # row names if possible. > > mdrop <- missing(drop) > Narg <- nargs() - (!mdrop) > if (Narg < 3) { > if (!mdrop) > warning("drop argument will be ignored") > if (missing(i)) > return(x) > if (is.matrix(i)) > return(as.matrix(x)[i]) > y <- NextMethod("[") > cols <- names(y) > if (!is.null(cols) && any(is.na(cols))) > stop("undefined columns selected") > if (any(duplicated(cols))) > names(y) <- make.unique(cols) > return(structure(y, class = oldClass(x), > row.names = .row_names_info(x, 0L))) > } > if (missing(i)) { > if (missing(j) && drop && length(x) == 1L) > return(.subset2(x, 1L)) > y <- if (missing(j)) > x > else .subset(x, j) > if (drop && length(y) == 1L) > return(.subset2(y, 1L)) > cols <- names(y) > if (any(is.na(cols))) > stop("undefined columns selected") > if (any(duplicated(cols))) > names(y) <- make.unique(cols) > nrow <- .row_names_info(x, 2L) > if (drop && !mdrop && nrow == 1L) > return(structure(y, class = NULL, row.names = NULL)) > else return(structure(y, class = oldClass(x), > row.names = .row_names_info(x, 0L))) > } > xx <- x > cols <- names(xx) > x <- vector("list", length(x)) > x <- .Call("R_copyDFattr", xx, x, PACKAGE = "base") > oldClass(x) <- attr(x, "row.names") <- NULL > # Do not want to check for duplicates if don't need to > noDuplicateRowNames <- (is.logical(i) || > (!is.null(attr(x, "dup.row.names"))) || > (is.numeric(i) && min(i, 0, na.rm=TRUE) < 0) || > (!anyMissing(i) && !notSorted(i, strict = TRUE))) > > if (!missing(j)) { > x <- x[j] > cols <- names(x) > if (drop && length(x) == 1L) { > if (is.character(i)) { > rows <- attr(xx, "row.names") > i <- pmatch(i, rows, duplicates.ok = TRUE) > } > xj <- .subset2(.subset(xx, j), 1L) > return(if (length(dim(xj)) != 2L) xj[i] else xj[i, > , drop = FALSE]) > } > if (any(is.na(cols))) > stop("undefined columns selected") > nxx <- structure(seq_along(xx), names = names(xx)) > sxx <- match(nxx[j], seq_along(xx)) > } > else sxx <- seq_along(x) > rows <- NULL > if (is.character(i)) { > rows <- attr(xx, "row.names") > i <- pmatch(i, rows, duplicates.ok = TRUE) > } > for (j in seq_along(x)) { > xj <- xx[[sxx[j]]] > x[[j]] <- if (length(dim(xj)) != 2L) > xj[i] > else xj[i, , drop = FALSE] > } > if (drop) { > n <- length(x) > if (n == 1L) > return(x[[1L]]) > if (n > 1L) { > xj <- x[[1L]] > nrow <- if (length(dim(xj)) == 2L) > dim(xj)[1L] > else length(xj) > drop <- !mdrop && nrow == 1L > } > else drop <- FALSE > } > if (!drop) { > if (is.null(rows)) > rows <- attr(xx, "row.names") > rows <- rows[i] > if(any(is.na(rows))) > rows[is.na(rows)] <- "NA" > if(!noDuplicateRowNames && any(duplicated(rows))) > rows <- make.unique(as.character(rows)) > if (any(duplicated(nm <- names(x)))) > names(x) <- make.unique(nm) > if (is.null(rows)) > rows <- attr(xx, "row.names")[i] > attr(x, "row.names") <- rows > oldClass(x) <- oldClass(xx) > } > x > } > > # That requires anyMissing from the splus2R package, > # plus notSorted (or a version of is.unsorted with argument 'strict' > added). > > (first version of notSorted is omitted) > > > On Tue, Jul 1, 2008 at 11:20 AM, Tim Hesterberg <[EMAIL PROTECTED]> > wrote: > >> Below is a version of [.data.frame that is faster >> for subscripting rows of large data frames; it avoids calling >> duplicated(rows) >> if there is no need to check for duplicate row names, when: >> i is logical >> attr(x, "dup.row.names") is not NULL (S+ compatibility) >> i is numeric and negative >> i is strictly increasing >> > > [[alternative HTML version deleted]] ______________________________________________ R-devel@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-devel