Changeset: 83719c82c527 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=83719c82c527
Modified Files:
        clients/R/MonetDB.R/NAMESPACE
        clients/R/MonetDB.R/NEWS
        clients/R/MonetDB.R/R/dplyr.R
        clients/R/Tests/dplyr.R
Branch: default
Log Message:

R Connector: dplyr support for sample_n and sample_frac


diffs (74 lines):

diff --git a/clients/R/MonetDB.R/NAMESPACE b/clients/R/MonetDB.R/NAMESPACE
--- a/clients/R/MonetDB.R/NAMESPACE
+++ b/clients/R/MonetDB.R/NAMESPACE
@@ -28,4 +28,6 @@ export(db_insert_into.MonetDBConnection)
 export(db_create_index.MonetDBConnection)
 export(db_analyze.MonetDBConnection)
 export(sql_subquery.MonetDBConnection)
-export(monetdb_queryinfo)
+export(monetdb_queryinfo) 
+export(sample_n.tbl_monetdb)
+export(sample_frac.tbl_monetdb)
diff --git a/clients/R/MonetDB.R/NEWS b/clients/R/MonetDB.R/NEWS
--- a/clients/R/MonetDB.R/NEWS
+++ b/clients/R/MonetDB.R/NEWS
@@ -1,6 +1,7 @@
 0.9.6
 - Fixed non-ASCII character handling (thanks, Roman!)
 - Fully removed C-based socket code
+- support for dplyr verbs sample_n() and sample_frac()
 
 0.9.5
 - Removed package date (Thanks, Dimitar)
diff --git a/clients/R/MonetDB.R/R/dplyr.R b/clients/R/MonetDB.R/R/dplyr.R
--- a/clients/R/MonetDB.R/R/dplyr.R
+++ b/clients/R/MonetDB.R/R/dplyr.R
@@ -19,7 +19,7 @@ src_translate_env.src_monetdb <- functio
 }
 
 src_desc.src_monetdb <- function(x) {
-  paste0("MonetDB ",x$info$monet_version, " (",x$info$monet_release, ") [", 
x$info$merovingian_uri,"]")
+  paste0("MonetDB ",x$info$monet_version, " (",x$info$monet_release, ")")
 }
 
 tbl.src_monetdb <- function(src, from, ...) {
@@ -27,6 +27,25 @@ tbl.src_monetdb <- function(src, from, .
   dplyr::tbl_sql("monetdb", src = src, from = from, ...)
 }
 
+sample_n.tbl_monetdb <- function(x, size, replace = FALSE, weight = NULL) {
+  if (replace || !is.null(weight)) {
+    stop("Sorry, replace and weight are not supported for MonetDB tables. \
+      Consider collect()'ing first.")
+  }
+  dbGetQuery(x$src$con, dplyr::build_sql(x$query$sql, " SAMPLE ", 
as.integer(size)))
+}
+
+sample_frac.tbl_monetdb <- function(tbl, frac=1, replace = FALSE, weight = 
NULL) {
+  if (frac < 0 || frac > 1) {
+    stop("frac must be in [0,1]")
+  }
+  n <- as.integer(round(dim(tbl)[[1]] * frac))
+  if (n < 1) {
+    stop("not sampling 0 rows...")
+  }
+  sample_n(tbl, n, replace, weight)
+}
+
 db_query_fields.MonetDBConnection <- function(con, sql, ...) {
   # prepare gives us column info without actually running a query. Nice.
   dbGetQuery(con, dplyr::build_sql("PREPARE SELECT * FROM ", sql))$column
diff --git a/clients/R/Tests/dplyr.R b/clients/R/Tests/dplyr.R
--- a/clients/R/Tests/dplyr.R
+++ b/clients/R/Tests/dplyr.R
@@ -96,6 +96,10 @@ print(nrow(head(anti_join(player_info, h
 }))
 # TODO: set ops
 
+# sample functions
+print(nrow(sample_n(player_info, 10L)))
+print(nrow(head(sample_frac(player_info, .5), n=10L)))
+
 # Arbitrary SQL -------------------------------------------------------------
 # You can also provide sql as is, using the sql function:
 batting2008 <- tbl(dps,
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to