This is an automated email from the ASF dual-hosted git repository.
thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 79abb7362f GH-37842: [R] Implement infer_schema.data.frame() (#37843)
79abb7362f is described below
commit 79abb7362f671f484675b89f19566df861c45f6f
Author: Nic Crane <[email protected]>
AuthorDate: Thu Sep 28 21:40:45 2023 +0100
GH-37842: [R] Implement infer_schema.data.frame() (#37843)
### Rationale for this change
Users will be able to easily see the schema which their `data.frame` object
will have when it's converted into an Arrwo table.
### What changes are included in this PR?
Implements `infer_schema()` method for `data.frame` objects.
Before:
``` r
library(arrow)
schema(mtcars)
#> Error in UseMethod("infer_schema"): no applicable method for
'infer_schema' applied to an object of class "data.frame"
```
After:
``` r
library(arrow)
schema(mtcars)
#> Schema
#> mpg: double
#> cyl: double
#> disp: double
#> hp: double
#> drat: double
#> wt: double
#> qsec: double
#> vs: double
#> am: double
#> gear: double
#> carb: double
#>
#> See $metadata for additional Schema metadata
```
### Are these changes tested?
Yes
### Are there any user-facing changes?
Yes
* Closes: #37842
Authored-by: Nic Crane <[email protected]>
Signed-off-by: Nic Crane <[email protected]>
---
r/NAMESPACE | 1 +
r/R/schema.R | 3 +++
r/tests/testthat/test-schema.R | 5 +++++
3 files changed, 9 insertions(+)
diff --git a/r/NAMESPACE b/r/NAMESPACE
index 21f88b4180..d49255f781 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -112,6 +112,7 @@ S3method(infer_schema,ArrowTabular)
S3method(infer_schema,Dataset)
S3method(infer_schema,RecordBatchReader)
S3method(infer_schema,arrow_dplyr_query)
+S3method(infer_schema,data.frame)
S3method(infer_type,ArrowDatum)
S3method(infer_type,Expression)
S3method(infer_type,blob)
diff --git a/r/R/schema.R b/r/R/schema.R
index 1ad18e3141..ac0604b2b3 100644
--- a/r/R/schema.R
+++ b/r/R/schema.R
@@ -285,6 +285,9 @@ infer_schema.Dataset <- function(x) x$schema
#' @export
infer_schema.arrow_dplyr_query <- function(x) implicit_schema(x)
+#' @export
+infer_schema.data.frame <- function(x) schema(!!!lapply(x, infer_type))
+
#' @export
names.Schema <- function(x) x$names
diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R
index db91cee330..b1dc065929 100644
--- a/r/tests/testthat/test-schema.R
+++ b/r/tests/testthat/test-schema.R
@@ -295,9 +295,14 @@ test_that("schema name assignment", {
test_that("schema extraction", {
skip_if_not_available("dataset")
+
tbl <- arrow_table(example_data)
+ expect_equal(schema(example_data), tbl$schema)
expect_equal(schema(tbl), tbl$schema)
+ expect_equal(schema(data.frame(a = 1, a = "x", check.names = FALSE)),
schema(a = double(), a = string()))
+ expect_equal(schema(data.frame()), schema())
+
ds <- InMemoryDataset$create(example_data)
expect_equal(schema(ds), ds$schema)