Changeset: 83719c82c527 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=83719c82c527
Modified Files:
clients/R/MonetDB.R/NAMESPACE
clients/R/MonetDB.R/NEWS
clients/R/MonetDB.R/R/dplyr.R
clients/R/Tests/dplyr.R
Branch: default
Log Message:
R Connector: dplyr support for sample_n and sample_frac
diffs (74 lines):
diff --git a/clients/R/MonetDB.R/NAMESPACE b/clients/R/MonetDB.R/NAMESPACE
--- a/clients/R/MonetDB.R/NAMESPACE
+++ b/clients/R/MonetDB.R/NAMESPACE
@@ -28,4 +28,6 @@ export(db_insert_into.MonetDBConnection)
export(db_create_index.MonetDBConnection)
export(db_analyze.MonetDBConnection)
export(sql_subquery.MonetDBConnection)
-export(monetdb_queryinfo)
+export(monetdb_queryinfo)
+export(sample_n.tbl_monetdb)
+export(sample_frac.tbl_monetdb)
diff --git a/clients/R/MonetDB.R/NEWS b/clients/R/MonetDB.R/NEWS
--- a/clients/R/MonetDB.R/NEWS
+++ b/clients/R/MonetDB.R/NEWS
@@ -1,6 +1,7 @@
0.9.6
- Fixed non-ASCII character handling (thanks, Roman!)
- Fully removed C-based socket code
+- support for dplyr verbs sample_n() and sample_frac()
0.9.5
- Removed package date (Thanks, Dimitar)
diff --git a/clients/R/MonetDB.R/R/dplyr.R b/clients/R/MonetDB.R/R/dplyr.R
--- a/clients/R/MonetDB.R/R/dplyr.R
+++ b/clients/R/MonetDB.R/R/dplyr.R
@@ -19,7 +19,7 @@ src_translate_env.src_monetdb <- functio
}
src_desc.src_monetdb <- function(x) {
- paste0("MonetDB ",x$info$monet_version, " (",x$info$monet_release, ") [",
x$info$merovingian_uri,"]")
+ paste0("MonetDB ",x$info$monet_version, " (",x$info$monet_release, ")")
}
tbl.src_monetdb <- function(src, from, ...) {
@@ -27,6 +27,25 @@ tbl.src_monetdb <- function(src, from, .
dplyr::tbl_sql("monetdb", src = src, from = from, ...)
}
+sample_n.tbl_monetdb <- function(x, size, replace = FALSE, weight = NULL) {
+ if (replace || !is.null(weight)) {
+ stop("Sorry, replace and weight are not supported for MonetDB tables. \
+ Consider collect()'ing first.")
+ }
+ dbGetQuery(x$src$con, dplyr::build_sql(x$query$sql, " SAMPLE ",
as.integer(size)))
+}
+
+sample_frac.tbl_monetdb <- function(tbl, frac=1, replace = FALSE, weight =
NULL) {
+ if (frac < 0 || frac > 1) {
+ stop("frac must be in [0,1]")
+ }
+ n <- as.integer(round(dim(tbl)[[1]] * frac))
+ if (n < 1) {
+ stop("not sampling 0 rows...")
+ }
+ sample_n(tbl, n, replace, weight)
+}
+
db_query_fields.MonetDBConnection <- function(con, sql, ...) {
# prepare gives us column info without actually running a query. Nice.
dbGetQuery(con, dplyr::build_sql("PREPARE SELECT * FROM ", sql))$column
diff --git a/clients/R/Tests/dplyr.R b/clients/R/Tests/dplyr.R
--- a/clients/R/Tests/dplyr.R
+++ b/clients/R/Tests/dplyr.R
@@ -96,6 +96,10 @@ print(nrow(head(anti_join(player_info, h
}))
# TODO: set ops
+# sample functions
+print(nrow(sample_n(player_info, 10L)))
+print(nrow(head(sample_frac(player_info, .5), n=10L)))
+
# Arbitrary SQL -------------------------------------------------------------
# You can also provide sql as is, using the sql function:
batting2008 <- tbl(dps,
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list