Changeset: 989e009a85f8 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=989e009a85f8
Modified Files:
clients/R/MonetDB.R/R/monetdb.R
clients/R/monet.frame/R/monetframe.R
Branch: default
Log Message:
R Connector: now uses new quantile() aggregate if available, ~5x speedup
diffs (108 lines):
diff --git a/clients/R/MonetDB.R/R/monetdb.R b/clients/R/MonetDB.R/R/monetdb.R
--- a/clients/R/MonetDB.R/R/monetdb.R
+++ b/clients/R/MonetDB.R/R/monetdb.R
@@ -779,14 +779,6 @@ REPLY_SIZE <- 100 # Apparently, -1 me
}
-.hasColFunc <- function(conn,func) {
- tryCatch({
- r <- dbSendQuery(conn,paste0("SELECT
",func,"(1);"))
- TRUE
- }, error = function(e) {
- FALSE
- })
-}
# copied from RMonetDB, no java-specific things in here...
# TODO: read first few rows with read.table and check types etc.
diff --git a/clients/R/monet.frame/R/monetframe.R
b/clients/R/monet.frame/R/monetframe.R
--- a/clients/R/monet.frame/R/monetframe.R
+++ b/clients/R/monet.frame/R/monetframe.R
@@ -191,6 +191,24 @@ as.vector.monet.frame <- av <- function(
l
}
+.hasColFunc <- function(conn,func) {
+ tryCatch({
+ r <- dbSendQuery(conn,paste0("SELECT
",func,"(1);"))
+ TRUE
+ }, error = function(e) {
+ FALSE
+ })
+}
+
+.hasColFuncParam2 <- function(conn,func,param) {
+ tryCatch({
+ r <- dbSendQuery(conn,paste0("SELECT
",func,"(1,",param,");"))
+ TRUE
+ }, error = function(e) {
+ FALSE
+ })
+}
+
.is.sequential <- function(x, eps=1e-8) {
if (length(x) && isTRUE(abs(x[1] - floor(x[1])) < eps)) {
all(abs(diff(x)-1) < eps)
@@ -838,7 +856,7 @@ mean.monet.frame <- avg.monet.frame <- f
conn <- attr(x,"conn")
nexpr <- NA
- if (func %in% c("min", "max",
"sum","avg","abs","sign","sqrt","floor","ceiling","exp","log","cos","sin","tan","acos","asin","atan","cosh","sinh","tanh","stddev_pop","stddev","prod","distinct"))
{
+ if (func %in% c("min", "max",
"sum","avg","abs","sign","sqrt","floor","ceiling","exp","log","cos","sin","tan","acos","asin","atan","cosh","sinh","tanh","stddev_pop","stddev","prod","distinct","quantile"))
{
nexpr <- paste0(toupper(func),"(",col,")")
}
if (func == "range") {
@@ -1111,29 +1129,42 @@ sort.monet.frame <- function (x, decreas
monet.frame.internal(conn,nquery,.is.debug(x),nrow.hint=nrow(x),ncol.hint=ncol(x),cnames.hint=names(x),rtypes.hint=rTypes(x))
}
-quantile.monet.frame <- function(x, probs = seq(0, 1, 0.25), na.rm = FALSE,
+quantile.monet.frame <- function(x, probs = seq(0, 1, 0.25), na.rm = FALSE,
names = TRUE, type = 7, printDots=FALSE, ...) {
+ if (any(probs < 0) || any(probs > 1)) stop("Quantiles must be in range
[0,1]")
if (ncol(x) != 1)
stop("quantile() only defined for one-column frames, consider
using $ first.")
isNum <- attr(x,"rtypes")[[1]] == "numeric"
if (!isNum)
stop("quantile() is only defined for numeric columns.")
if (na.rm) x <- .filter.na(x)
- n <- nrow(x)
- ret <- c()
- for (i in 1:length(probs)) {
- if (printDots) cat(".")
- index <- ceiling(probs[i]*n)+1
- if (index > n) index <- n
- # TODO: if prob = 0.5 use median()?
- y <- sort(x)[index,1,drop=FALSE]
- ret <- c(ret,as.vector(y)[[1]])
+ if (.hasColFuncParam2(attr(x,"conn"),"quantile",.5)) {
+ # make some effort to get all quantiles in a single call,
allows later optimization!
+ # TODO: move this to generic functions, like wrapSelect etc.
+ query <- getQuery(x)
+ col <- sub("(select )(.*?)(
from.*)","\\2",query,ignore.case=TRUE)
+ nexpr <- paste0("QUANTILE((",col,"),",probs,")",collapse=",")
+ nquery <- sub("select (.*?) from",paste0("SELECT ",nexpr,"
FROM"),query,ignore.case=TRUE)
+ if (.is.debug(x)) cat(paste0("EX: '",nquery,"'\n",sep=""))
+ ret <- as.vector(dbGetQuery(attr(x,"conn"),nquery)[1,])
+ }
+ else {
+ n <- nrow(x)
+ ret <- c()
+ for (i in 1:length(probs)) {
+ if (printDots) cat(".")
+ index <- ceiling(probs[i]*n)+1
+ if (index > n) index <- n
+ y <- sort(x)[index,1,drop=FALSE]
+ ret <- c(ret,as.vector(y)[[1]])
+ }
}
if (names) names(ret) <- paste0(as.integer(probs*100),"%")
ret
}
+
median.monet.frame <- function (x, na.rm = FALSE) {
# TODO: use median() here
quantile(x,0.5,na.rm=na.rm,names=FALSE)[[1]]
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list