This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 464bd7cf feat(r): Add reticulate/Python integration (#817)
464bd7cf is described below

commit 464bd7cf5135b59fb51c19ad4a8a5c2049df130d
Author: Dewey Dunnington <[email protected]>
AuthorDate: Thu Oct 23 15:58:30 2025 -0500

    feat(r): Add reticulate/Python integration (#817)
    
    This PR adds the s3 generics required to interact with Python objects
    via reticulate. Thie relies on nanoarrow for Python, which also exposes
    primitives for interacting with capsules. In theory we wouldn't need
    nanoarrow for Python for the Python -> R case if we could create and
    consume PyCapsules but this is at least a good start!
    
    ``` r
    library(nanoarrow)
    library(reticulate)
    
    pa <- import("pyarrow", convert = FALSE)
    
    x <- pa$array(c(1, 2, 3))
    
    (array <- as_nanoarrow_array(x))
    #> <nanoarrow_array double[3]>
    #>  $ length    : int 3
    #>  $ null_count: int 0
    #>  $ offset    : int 0
    #>  $ buffers   :List of 2
    #>   ..$ :<nanoarrow_buffer validity<bool>[null] ``
    #>   ..$ :<nanoarrow_buffer data<double>[3][24 b]> `1 2 3`
    #>  $ dictionary: NULL
    #>  $ children  : list()
    (py_array <- r_to_py(array))
    #> nanoarrow.Array<double>[3]
    #> 1.0
    #> 2.0
    #> 3.0
    py_to_r(py_array)
    #> <nanoarrow_array double[3]>
    #>  $ length    : int 3
    #>  $ null_count: int 0
    #>  $ offset    : int 0
    #>  $ buffers   :List of 2
    #>   ..$ :<nanoarrow_buffer validity<bool>[null] ``
    #>   ..$ :<nanoarrow_buffer data<double>[3][24 b]> `1 2 3`
    #>  $ dictionary: NULL
    #>  $ children  : list()
    ```
    
    Closes #810.
---
 .github/workflows/r-check.yaml                     |   1 +
 r/DESCRIPTION                                      |   1 +
 r/NAMESPACE                                        |   4 +
 r/R/pkg-reticulate.R                               | 174 +++++++++++++++++++++
 r/R/zzz.R                                          |   7 +
 r/man/as_nanoarrow_schema.python.builtin.object.Rd |  63 ++++++++
 r/tests/testthat/test-pkg-reticulate.R             | 111 +++++++++++++
 7 files changed, 361 insertions(+)

diff --git a/.github/workflows/r-check.yaml b/.github/workflows/r-check.yaml
index aaa308b0..d9387234 100644
--- a/.github/workflows/r-check.yaml
+++ b/.github/workflows/r-check.yaml
@@ -81,6 +81,7 @@ jobs:
         env:
           ARROW_R_VERBOSE_TEST: "true"
           _R_CHECK_FORCE_SUGGESTS_: false
+          NANOARROW_R_TEST_RETICULATE: "true"
         with:
           upload-snapshots: true
           working-directory: r
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 8f94629c..5cb042ea 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -29,6 +29,7 @@ Suggests:
     dplyr,
     hms,
     jsonlite,
+    reticulate,
     rlang,
     testthat (>= 3.0.0),
     tibble,
diff --git a/r/NAMESPACE b/r/NAMESPACE
index a374be98..313d3b32 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -39,6 +39,7 @@ S3method(as_nanoarrow_array,list)
 S3method(as_nanoarrow_array,matrix)
 S3method(as_nanoarrow_array,nanoarrow_array)
 S3method(as_nanoarrow_array,nanoarrow_buffer)
+S3method(as_nanoarrow_array,python.builtin.object)
 S3method(as_nanoarrow_array,vctrs_unspecified)
 S3method(as_nanoarrow_array_extension,default)
 S3method(as_nanoarrow_array_extension,nanoarrow_extension_spec_vctrs)
@@ -54,6 +55,7 @@ S3method(as_nanoarrow_array_stream,default)
 S3method(as_nanoarrow_array_stream,nanoarrow_array)
 S3method(as_nanoarrow_array_stream,nanoarrow_array_stream)
 S3method(as_nanoarrow_array_stream,nanoarrow_vctr)
+S3method(as_nanoarrow_array_stream,python.builtin.object)
 S3method(as_nanoarrow_buffer,default)
 S3method(as_nanoarrow_buffer,nanoarrow_buffer)
 S3method(as_nanoarrow_schema,DataType)
@@ -61,6 +63,7 @@ S3method(as_nanoarrow_schema,Field)
 S3method(as_nanoarrow_schema,Schema)
 S3method(as_nanoarrow_schema,nanoarrow_schema)
 S3method(as_nanoarrow_schema,nanoarrow_vctr)
+S3method(as_nanoarrow_schema,python.builtin.object)
 S3method(c,nanoarrow_vctr)
 S3method(convert_array,default)
 S3method(convert_array,factor)
@@ -219,6 +222,7 @@ export(nanoarrow_with_zstd)
 export(read_nanoarrow)
 export(register_nanoarrow_extension)
 export(resolve_nanoarrow_extension)
+export(test_reticulate_with_nanoarrow)
 export(unregister_nanoarrow_extension)
 export(write_nanoarrow)
 importFrom(utils,getFromNamespace)
diff --git a/r/R/pkg-reticulate.R b/r/R/pkg-reticulate.R
new file mode 100644
index 00000000..5ca7608e
--- /dev/null
+++ b/r/R/pkg-reticulate.R
@@ -0,0 +1,174 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+#' Python integration via reticulate
+#'
+#' These functions enable Python wrapper objects created via reticulate to
+#' be used with any function that uses [as_nanoarrow_array()] or
+#' [as_nanoarrow_array_stream()] to accept generic "arrowable" input.
+#' Implementations for [reticulate::py_to_r()] and [reticulate::r_to_py()]
+#' are also included such that nanoarrow's array/schema/array stream objects
+#' can be passed as arguments to Python functions that would otherwise accept
+#' an object implementing the Arrow PyCapsule protocol.
+#'
+#' This implementation uses the
+#' [Arrow PyCapsule 
protocol](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html)
+#' to interpret an arbitrary Python object as an Arrow array/schema/array 
stream
+#' and produces Python objects that implement this protocol. This is currently
+#' implemented using the nanoarrow Python package which provides similar
+#' primitives for facilitating interchange in Python.
+#'
+#' @param x An Python object to convert
+#' @param schema A requested schema, which may or may not be honoured depending
+#'   on the capabilities of the producer
+#' @param ... Unused
+#'
+#' @returns
+#'   - `as_nanoarrow_schema()` returns an object of class nanoarrow_schema
+#'   - `as_nanoarrow_array()` returns an object of class nanoarrow_array
+#'   - `as_nanoarrow_array_stream()` returns an object of class
+#'     nanoarrow_array_stream.
+#' @export
+#'
+#' @examplesIf test_reticulate_with_nanoarrow()
+#' library(reticulate)
+#'
+#' py_require("nanoarrow")
+#'
+#' na <- import("nanoarrow", convert = FALSE)
+#' python_arrayish_thing <- na$Array(1:3, na_int32())
+#' as_nanoarrow_array(python_arrayish_thing)
+#'
+#' r_to_py(as_nanoarrow_array(1:3))
+as_nanoarrow_schema.python.builtin.object <- function(x, ...) {
+  na <- reticulate::import("nanoarrow", convert = FALSE)
+  c_schema <- na$c_schema(x)
+
+  schema_dst <- nanoarrow_allocate_schema()
+  nanoarrow_pointer_move(
+    reticulate::py_str(c_schema[["_addr"]]()),
+    schema_dst
+  )
+
+  schema_dst
+}
+
+#' @rdname as_nanoarrow_schema.python.builtin.object
+#' @export
+as_nanoarrow_array.python.builtin.object <- function(x, ..., schema = NULL) {
+  if (!is.null(schema)) {
+    schema <- reticulate::r_to_py(as_nanoarrow_schema(schema), convert = FALSE)
+  }
+
+  na <- reticulate::import("nanoarrow", convert = FALSE)
+  c_array <- na$c_array(x, schema)
+
+  schema_dst <- nanoarrow_allocate_schema()
+  array_dst <-  nanoarrow_allocate_array()
+  nanoarrow_pointer_move(
+    reticulate::py_str(c_array$schema[["_addr"]]()),
+    schema_dst
+  )
+  nanoarrow_pointer_move(
+    reticulate::py_str(c_array[["_addr"]]()),
+    array_dst
+  )
+
+  nanoarrow_array_set_schema(array_dst, schema_dst, validate = FALSE)
+  array_dst
+}
+
+#' @rdname as_nanoarrow_schema.python.builtin.object
+#' @export
+as_nanoarrow_array_stream.python.builtin.object <- function(x, ..., schema = 
NULL) {
+  if (!is.null(schema)) {
+    schema <- reticulate::r_to_py(as_nanoarrow_schema(schema), convert = FALSE)
+  }
+
+  na <- reticulate::import("nanoarrow", convert = FALSE)
+  c_array_stream <- na$c_array_stream(x, schema)
+
+  array_stream_dst <- nanoarrow_allocate_array_stream()
+  nanoarrow_pointer_move(
+    reticulate::py_str(c_array_stream[["_addr"]]()),
+    array_stream_dst
+  )
+
+  array_stream_dst
+}
+
+r_to_py.nanoarrow_schema <- function(x, convert = FALSE) {
+  na_c_schema <- reticulate::import("nanoarrow.c_schema", convert = FALSE)
+
+  out <- na_c_schema$allocate_c_schema()
+  out_addr <- reticulate::py_str(out[["_addr"]]())
+  nanoarrow_pointer_export(x, out_addr)
+
+  na <- reticulate::import("nanoarrow", convert = FALSE)
+  na$Schema(out)
+}
+
+r_to_py.nanoarrow_array <- function(x, convert = FALSE) {
+  na_c_array <- reticulate::import("nanoarrow.c_array", convert = FALSE)
+
+  out <- na_c_array$allocate_c_array()
+  out_addr <- reticulate::py_str(out[["_addr"]]())
+  out_schema_addr <- reticulate::py_str(out$schema[["_addr"]]())
+
+  nanoarrow_pointer_export(infer_nanoarrow_schema(x), out_schema_addr)
+  nanoarrow_pointer_export(x, out_addr)
+
+  na <- reticulate::import("nanoarrow", convert = FALSE)
+  na$Array(out)
+}
+
+r_to_py.nanoarrow_array_stream <- function(x, convert = FALSE) {
+  na_c_array_stream <- reticulate::import("nanoarrow.c_array_stream", convert 
= FALSE)
+
+  out <- na_c_array_stream$allocate_c_array_stream()
+  out_addr <- reticulate::py_str(out[["_addr"]]())
+  nanoarrow_pointer_export(x, out_addr)
+
+  na <- reticulate::import("nanoarrow", convert = FALSE)
+  na$ArrayStream(out)
+}
+
+py_to_r.nanoarrow.schema.Schema <- function(x) {
+  as_nanoarrow_schema(x)
+}
+
+py_to_r.nanoarrow.array.Array <- function(x) {
+  as_nanoarrow_array(x)
+}
+
+py_to_r.nanoarrow.array_stream.ArrayStream <- function(x) {
+  as_nanoarrow_array_stream(x)
+}
+
+#' @rdname as_nanoarrow_schema.python.builtin.object
+#' @export
+test_reticulate_with_nanoarrow <- function() {
+  identical(Sys.getenv("NANOARROW_R_TEST_RETICULATE"), "true") &&
+    packageVersion("reticulate") >= "1.43.0"
+}
+
+has_reticulate_with_nanoarrow <- function() {
+  requireNamespace("reticulate", quietly = TRUE) &&
+    reticulate::py_available() &&
+    !inherits(try(reticulate::import("nanoarrow"), silent = TRUE), "try-error")
+}
diff --git a/r/R/zzz.R b/r/R/zzz.R
index af7522ce..9d271fc9 100644
--- a/r/R/zzz.R
+++ b/r/R/zzz.R
@@ -31,6 +31,13 @@
   s3_register("arrow::as_arrow_table", "nanoarrow_array_stream")
   s3_register("arrow::as_record_batch_reader", "nanoarrow_array_stream")
 
+  s3_register("reticulate::r_to_py", "nanoarrow_schema")
+  s3_register("reticulate::r_to_py", "nanoarrow_array")
+  s3_register("reticulate::r_to_py", "nanoarrow_array_stream")
+  s3_register("reticulate::py_to_r", "nanoarrow.schema.Schema")
+  s3_register("reticulate::py_to_r", "nanoarrow.array.Array")
+  s3_register("reticulate::py_to_r", "nanoarrow.array_stream.ArrayStream")
+
   s3_register("tibble::as_tibble", "nanoarrow_array")
 }
 
diff --git a/r/man/as_nanoarrow_schema.python.builtin.object.Rd 
b/r/man/as_nanoarrow_schema.python.builtin.object.Rd
new file mode 100644
index 00000000..e2b0243e
--- /dev/null
+++ b/r/man/as_nanoarrow_schema.python.builtin.object.Rd
@@ -0,0 +1,63 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/pkg-reticulate.R
+\name{as_nanoarrow_schema.python.builtin.object}
+\alias{as_nanoarrow_schema.python.builtin.object}
+\alias{as_nanoarrow_array.python.builtin.object}
+\alias{as_nanoarrow_array_stream.python.builtin.object}
+\alias{test_reticulate_with_nanoarrow}
+\title{Python integration via reticulate}
+\usage{
+\method{as_nanoarrow_schema}{python.builtin.object}(x, ...)
+
+\method{as_nanoarrow_array}{python.builtin.object}(x, ..., schema = NULL)
+
+\method{as_nanoarrow_array_stream}{python.builtin.object}(x, ..., schema = 
NULL)
+
+test_reticulate_with_nanoarrow()
+}
+\arguments{
+\item{x}{An Python object to convert}
+
+\item{...}{Unused}
+
+\item{schema}{A requested schema, which may or may not be honoured depending
+on the capabilities of the producer}
+}
+\value{
+\itemize{
+\item \code{as_nanoarrow_schema()} returns an object of class nanoarrow_schema
+\item \code{as_nanoarrow_array()} returns an object of class nanoarrow_array
+\item \code{as_nanoarrow_array_stream()} returns an object of class
+nanoarrow_array_stream.
+}
+}
+\description{
+These functions enable Python wrapper objects created via reticulate to
+be used with any function that uses 
\code{\link[=as_nanoarrow_array]{as_nanoarrow_array()}} or
+\code{\link[=as_nanoarrow_array_stream]{as_nanoarrow_array_stream()}} to 
accept generic "arrowable" input.
+Implementations for 
\code{\link[reticulate:r-py-conversion]{reticulate::py_to_r()}} and 
\code{\link[reticulate:r-py-conversion]{reticulate::r_to_py()}}
+are also included such that nanoarrow's array/schema/array stream objects
+can be passed as arguments to Python functions that would otherwise accept
+an object implementing the Arrow PyCapsule protocol.
+}
+\details{
+This implementation uses the
+\href{https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html}{Arrow
 PyCapsule protocol}
+to interpret an arbitrary Python object as an Arrow array/schema/array stream
+and produces Python objects that implement this protocol. This is currently
+implemented using the nanoarrow Python package which provides similar
+primitives for facilitating interchange in Python.
+}
+\examples{
+\dontshow{if (test_reticulate_with_nanoarrow()) withAutoprint(\{ # examplesIf}
+library(reticulate)
+
+py_require("nanoarrow")
+
+na <- import("nanoarrow", convert = FALSE)
+python_arrayish_thing <- na$Array(1:3, na_int32())
+as_nanoarrow_array(python_arrayish_thing)
+
+r_to_py(as_nanoarrow_array(1:3))
+\dontshow{\}) # examplesIf}
+}
diff --git a/r/tests/testthat/test-pkg-reticulate.R 
b/r/tests/testthat/test-pkg-reticulate.R
new file mode 100644
index 00000000..031a0e0c
--- /dev/null
+++ b/r/tests/testthat/test-pkg-reticulate.R
@@ -0,0 +1,111 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+if (test_reticulate_with_nanoarrow()) {
+  reticulate::py_available(initialize = TRUE)
+  reticulate::py_require("nanoarrow")
+}
+
+test_that("as_nanoarrow_schema() for Python object", {
+  skip_if_not(has_reticulate_with_nanoarrow())
+
+  na <- reticulate::import("nanoarrow")
+
+  expect_identical(as_nanoarrow_schema(na$binary())$format, "z")
+})
+
+test_that("as_nanoarrow_array() for Python object", {
+  skip_if_not(has_reticulate_with_nanoarrow())
+
+  na <- reticulate::import("nanoarrow")
+
+  array <- as_nanoarrow_array(na$Array(1:5, na_int32()))
+  expect_identical(
+    convert_array(array),
+    1:5
+  )
+
+  # Check schema request argument
+  lst <- reticulate::py_eval('[1, 2, 3, 4, 5]', convert = FALSE)
+  array <- as_nanoarrow_array(lst, schema = na_int32())
+  expect_identical(
+    convert_array(array),
+    1:5
+  )
+})
+
+test_that("as_nanoarrow_stream() for Python object", {
+  skip_if_not(has_reticulate_with_nanoarrow())
+
+  na <- reticulate::import("nanoarrow")
+
+  stream <- as_nanoarrow_array_stream(na$ArrayStream(1:5, na_int32()))
+  expect_identical(
+    convert_array_stream(stream),
+    1:5
+  )
+
+  # Check schema request argument
+  lst <- reticulate::py_eval('[1, 2, 3, 4, 5]', convert = FALSE)
+  stream <- as_nanoarrow_array_stream(lst, schema = na_int32())
+  expect_identical(
+    convert_array_stream(stream),
+    1:5
+  )
+})
+
+test_that("schemas can be converted to Python and back", {
+  skip_if_not(has_reticulate_with_nanoarrow())
+
+  py_schema <- reticulate::r_to_py(na_binary())
+  expect_s3_class(py_schema, "nanoarrow.schema.Schema")
+  r_schema <- reticulate::py_to_r(py_schema)
+  expect_identical(r_schema$format, "z")
+})
+
+test_that("arrays can be converted to Python and back", {
+  skip_if_not(has_reticulate_with_nanoarrow())
+
+  py_array <- reticulate::r_to_py(as_nanoarrow_array(1:5))
+  expect_s3_class(py_array, "nanoarrow.array.Array")
+  expect_identical(reticulate::py_to_r(py_array$to_pylist()), 1:5)
+  r_array <- reticulate::py_to_r(py_array)
+  expect_identical(convert_array(r_array), 1:5)
+})
+
+test_that("arrays can be converted to Python and back", {
+  skip_if_not(has_reticulate_with_nanoarrow())
+
+  stream <- basic_array_stream(list(1:5, 6:10, 11:15))
+  expect_identical(
+    convert_array(stream$get_next()),
+    1:5
+  )
+
+  py_stream <- reticulate::r_to_py(stream)
+  expect_s3_class(py_stream, "nanoarrow.array_stream.ArrayStream")
+  expect_identical(
+    convert_array(reticulate::py_to_r(py_stream$read_next())),
+    6:10
+  )
+
+  r_stream <- reticulate::py_to_r(py_stream)
+  expect_identical(
+    convert_array(r_stream$get_next()),
+    11:15
+  )
+})

Reply via email to