Below is a version of [.data.frame that is faster for subscripting rows of large data frames; it avoids calling duplicated(rows) if there is no need to check for duplicate row names, when: i is logical attr(x, "dup.row.names") is not NULL (S+ compatibility) i is numeric and negative i is strictly increasing
"[.data.frame" <- function (x, i, j, drop = if (missing(i)) TRUE else length(cols) == 1) { # This version of [.data.frame avoid wasting time enforcing unique # row names. mdrop <- missing(drop) Narg <- nargs() - (!mdrop) if (Narg < 3) { if (!mdrop) warning("drop argument will be ignored") if (missing(i)) return(x) if (is.matrix(i)) return(as.matrix(x)[i]) y <- NextMethod("[") cols <- names(y) if (!is.null(cols) && any(is.na(cols))) stop("undefined columns selected") if (any(duplicated(cols))) names(y) <- make.unique(cols) return(structure(y, class = oldClass(x), row.names = .row_names_info(x, 0L))) } if (missing(i)) { if (missing(j) && drop && length(x) == 1L) return(.subset2(x, 1L)) y <- if (missing(j)) x else .subset(x, j) if (drop && length(y) == 1L) return(.subset2(y, 1L)) cols <- names(y) if (any(is.na(cols))) stop("undefined columns selected") if (any(duplicated(cols))) names(y) <- make.unique(cols) nrow <- .row_names_info(x, 2L) if (drop && !mdrop && nrow == 1L) return(structure(y, class = NULL, row.names = NULL)) else return(structure(y, class = oldClass(x), row.names = .row_names_info(x, 0L))) } xx <- x cols <- names(xx) x <- vector("list", length(x)) x <- .Call("R_copyDFattr", xx, x, PACKAGE = "base") oldClass(x) <- attr(x, "row.names") <- NULL # Do not want to check for duplicates if don't need to noDuplicateRowNames <- (is.logical(i) || (!is.null(attr(x, "dup.row.names"))) || (is.numeric(i) && min(i, 0, na.rm=TRUE) < 0) || (!notSorted(i, strict = TRUE))) if (!missing(j)) { x <- x[j] cols <- names(x) if (drop && length(x) == 1L) { if (is.character(i)) { rows <- attr(xx, "row.names") i <- pmatch(i, rows, duplicates.ok = TRUE) } xj <- .subset2(.subset(xx, j), 1L) return(if (length(dim(xj)) != 2L) xj[i] else xj[i, , drop = FALSE]) } if (any(is.na(cols))) stop("undefined columns selected") nxx <- structure(seq_along(xx), names = names(xx)) sxx <- match(nxx[j], seq_along(xx)) } else sxx <- seq_along(x) rows <- NULL if (is.character(i)) { rows <- attr(xx, "row.names") i <- pmatch(i, rows, duplicates.ok = TRUE) } for (j in seq_along(x)) { xj <- xx[[sxx[j]]] x[[j]] <- if (length(dim(xj)) != 2L) xj[i] else xj[i, , drop = FALSE] } if (drop) { n <- length(x) if (n == 1L) return(x[[1L]]) if (n > 1L) { xj <- x[[1L]] nrow <- if (length(dim(xj)) == 2L) dim(xj)[1L] else length(xj) drop <- !mdrop && nrow == 1L } else drop <- FALSE } if (!drop) { if (is.null(rows)) rows <- attr(xx, "row.names") rows <- rows[i] if ((ina <- any(is.na(rows))) | (dup <- !noDuplicateRowNames && any(duplicated(rows)))) { if (ina) rows[is.na(rows)] <- "NA" if (dup) rows <- make.unique(as.character(rows)) } if (any(duplicated(nm <- names(x)))) names(x) <- make.unique(nm) if (is.null(rows)) rows <- attr(xx, "row.names")[i] attr(x, "row.names") <- rows oldClass(x) <- oldClass(xx) } x } [[alternative HTML version deleted]] ______________________________________________ R-devel@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-devel