This is an automated email from the ASF dual-hosted git repository.
thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new f489996e70 GH-38376 [R]: Add `dimnames` method to `Dataset` class
(#38377)
f489996e70 is described below
commit f489996e707dd136e41f1eb8ed963f707eed18ad
Author: Jonathan Keane <[email protected]>
AuthorDate: Sun Oct 22 12:18:33 2023 -0500
GH-38376 [R]: Add `dimnames` method to `Dataset` class (#38377)
### Rationale for this change
Add `dimnames` method so that things like `colnames(ds)` work out of the
box on datasets. Inspired by dbplyr's lazy tbl implementation
### What changes are included in this PR?
An additional `dimnames` method for Dataset objects
### Are these changes tested?
Yes
### Are there any user-facing changes?
Yes
* Closes: #38376
Authored-by: Jonathan Keane <[email protected]>
Signed-off-by: Nic Crane <[email protected]>
---
r/NAMESPACE | 1 +
r/R/dataset.R | 3 +++
r/tests/testthat/test-dataset.R | 6 ++++++
3 files changed, 10 insertions(+)
diff --git a/r/NAMESPACE b/r/NAMESPACE
index dac2cbda9c..412d70ed22 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -101,6 +101,7 @@ S3method(dim,RecordBatchReader)
S3method(dim,StructArray)
S3method(dim,arrow_dplyr_query)
S3method(dimnames,ArrowTabular)
+S3method(dimnames,Dataset)
S3method(head,ArrowDatum)
S3method(head,ArrowTabular)
S3method(head,Dataset)
diff --git a/r/R/dataset.R b/r/R/dataset.R
index 2400d08393..682f6c1481 100644
--- a/r/R/dataset.R
+++ b/r/R/dataset.R
@@ -527,6 +527,9 @@ names.Dataset <- function(x) names(x$schema)
#' @export
dim.Dataset <- function(x) c(x$num_rows, x$num_cols)
+#' @export
+dimnames.Dataset <- function(x) list(NULL, names(x))
+
#' @export
c.Dataset <- function(...) Dataset$create(list(...))
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index b7632084e4..7ea47db449 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -334,6 +334,12 @@ test_that("dim method returns the correct number of rows
and columns", {
expect_identical(dim(ds), c(20L, 7L))
})
+test_that("dimnames, colnames on Dataset objects", {
+ ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
+ col_names <- c("int", "dbl", "lgl", "chr", "fct", "ts", "part")
+ expect_identical(dimnames(ds), list(NULL, col_names))
+ expect_identical(colnames(ds), col_names)
+})
test_that("dim() correctly determine numbers of rows and columns on
arrow_dplyr_query object", {
ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))