This is an automated email from the ASF dual-hosted git repository.

brycemecum pushed a commit to branch maint-19.0.1.1-r
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 453c1f4fdcca3b3b75ec158efcce5bb2c782fd72
Author: Jonathan Keane <[email protected]>
AuthorDate: Sat Apr 5 17:01:01 2025 -0500

    GH-45949: [R] Fix CRAN warnings for 19.0.1 about compiled code (#45951)
    
    This gets rid of `OBJECT`, `DATAPTR` has been replaced with `INTEGER()`, 
`REAL()`, etc. though strings are more complicated. I will fully admit that 
this C++ is stretching my comfort zone, so might include obviously wrong things!
    
    CI is currently failing, but I'm not totally sure yet if that means the 
code changes here are wrong or if maybe these allow us to have slightly 
different assumptions about materialization (see 
https://github.com/apache/arrow/pull/45951#discussion_r2019675135)
    
    I've also requested reviews broadly for folks I know have been around this 
code before, I appreciate any effort that y'all can spare 🙏
    
    * GitHub Issue: #45949
    
    Lead-authored-by: Jonathan Keane <[email protected]>
    Co-authored-by: Dewey Dunnington <[email protected]>
    Signed-off-by: Jonathan Keane <[email protected]>
---
 r/src/altrep.cpp               | 52 ++++++++++++++++++++++++++++--------------
 r/src/arrow_types.h            | 20 +++++++++++++++-
 r/src/r_to_arrow.cpp           | 12 +++++-----
 r/tests/testthat/test-altrep.R | 14 +++++-------
 4 files changed, 66 insertions(+), 32 deletions(-)

diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp
index 90a459e19c..a4adf58997 100644
--- a/r/src/altrep.cpp
+++ b/r/src/altrep.cpp
@@ -115,7 +115,7 @@ const std::shared_ptr<ChunkedArray>& GetChunkedArray(SEXP 
alt) {
 //        materialization is needed.
 // data2: starts as NULL, and becomes a standard R vector with the same
 //        data if necessary: if materialization is needed, e.g. if we need
-//        to access its data pointer, with DATAPTR().
+//        to access its data pointer, with INTEGER(), REAL(), etc.
 template <typename Impl>
 struct AltrepVectorBase {
   // store the Array as an external pointer in data1, mark as immutable
@@ -220,7 +220,14 @@ struct AltrepVectorPrimitive : public 
AltrepVectorBase<AltrepVectorPrimitive<sex
       SEXP copy = PROTECT(Rf_allocVector(sexp_type, size));
 
       // copy the data from the array, through Get_region
-      Get_region(alt, 0, size, reinterpret_cast<c_type*>(DATAPTR(copy)));
+      if constexpr (std::is_same_v<c_type, double>) {
+        Get_region(alt, 0, size, REAL(copy));
+      } else if constexpr (std::is_same_v<c_type, int>) {
+        Get_region(alt, 0, size, INTEGER(copy));
+      } else {
+        static_assert(std::is_same_v<c_type, double> || std::is_same_v<c_type, 
int>,
+                      "ALTREP not implemented for this c_type");
+      }
 
       // store as data2, this is now considered materialized
       SetRepresentation(alt, copy);
@@ -269,13 +276,27 @@ struct AltrepVectorPrimitive : public 
AltrepVectorBase<AltrepVectorPrimitive<sex
     }
 
     // Otherwise we have to materialize and hand the pointer to data2
-    return DATAPTR(Materialize(alt));
+    if constexpr (std::is_same_v<c_type, double>) {
+      return REAL(Materialize(alt));
+    } else if constexpr (std::is_same_v<c_type, int>) {
+      return INTEGER(Materialize(alt));
+    } else {
+      static_assert(std::is_same_v<c_type, double> || std::is_same_v<c_type, 
int>,
+                    "ALTREP not implemented for this c_type");
+    }
   }
 
   // The value at position i
   static c_type Elt(SEXP alt, R_xlen_t i) {
     if (IsMaterialized(alt)) {
-      return reinterpret_cast<c_type*>(DATAPTR(Representation(alt)))[i];
+      if constexpr (std::is_same_v<c_type, double>) {
+        return REAL(Representation(alt))[i];
+      } else if constexpr (std::is_same_v<c_type, int>) {
+        return INTEGER(Representation(alt))[i];
+      } else {
+        static_assert(std::is_same_v<c_type, double> || std::is_same_v<c_type, 
int>,
+                      "ALTREP not implemented for this c_type");
+      }
     }
 
     auto altrep_data =
@@ -531,7 +552,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> 
{
       SEXP copy = PROTECT(Rf_allocVector(INTSXP, size));
 
       // copy the data from the array, through Get_region
-      Get_region(alt, 0, size, reinterpret_cast<int*>(DATAPTR(copy)));
+      Get_region(alt, 0, size, INTEGER(copy));
 
       // store as data2, this is now considered materialized
       SetRepresentation(alt, copy);
@@ -552,7 +573,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> 
{
     return nullptr;
   }
 
-  static void* Dataptr(SEXP alt, Rboolean writeable) { return 
DATAPTR(Materialize(alt)); }
+  static void* Dataptr(SEXP alt, Rboolean writeable) { return 
INTEGER(Materialize(alt)); }
 
   static SEXP Duplicate(SEXP alt, Rboolean /* deep */) {
     // the representation integer vector
@@ -892,7 +913,9 @@ struct AltrepVectorString : public 
AltrepVectorBase<AltrepVectorString<Type>> {
     return s;
   }
 
-  static void* Dataptr(SEXP alt, Rboolean writeable) { return 
DATAPTR(Materialize(alt)); }
+  static void* Dataptr(SEXP alt, Rboolean writeable) {
+    return const_cast<void*>(DATAPTR_RO(Materialize(alt)));
+  }
 
   static SEXP Materialize(SEXP alt) {
     if (Base::IsMaterialized(alt)) {
@@ -931,7 +954,9 @@ struct AltrepVectorString : public 
AltrepVectorBase<AltrepVectorString<Type>> {
   }
 
   static const void* Dataptr_or_null(SEXP alt) {
-    if (Base::IsMaterialized(alt)) return DATAPTR(Representation(alt));
+    if (Base::IsMaterialized(alt)) {
+      return DATAPTR_RO(alt);
+    }
 
     // otherwise give up
     return nullptr;
@@ -1267,21 +1292,14 @@ sexp test_arrow_altrep_copy_by_dataptr(sexp x) {
 
   if (TYPEOF(x) == INTSXP) {
     cpp11::writable::integers out(Rf_xlength(x));
-    int* ptr = reinterpret_cast<int*>(DATAPTR(x));
+    int* ptr = INTEGER(x);
     for (R_xlen_t i = 0; i < n; i++) {
       out[i] = ptr[i];
     }
     return out;
   } else if (TYPEOF(x) == REALSXP) {
     cpp11::writable::doubles out(Rf_xlength(x));
-    double* ptr = reinterpret_cast<double*>(DATAPTR(x));
-    for (R_xlen_t i = 0; i < n; i++) {
-      out[i] = ptr[i];
-    }
-    return out;
-  } else if (TYPEOF(x) == STRSXP) {
-    cpp11::writable::strings out(Rf_xlength(x));
-    SEXP* ptr = reinterpret_cast<SEXP*>(DATAPTR(x));
+    double* ptr = REAL(x);
     for (R_xlen_t i = 0; i < n; i++) {
       out[i] = ptr[i];
     }
diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h
index 05c8f6062d..9da02fd09f 100644
--- a/r/src/arrow_types.h
+++ b/r/src/arrow_types.h
@@ -173,7 +173,7 @@ template <typename RVector>
 class RBuffer : public MutableBuffer {
  public:
   explicit RBuffer(RVector vec)
-      : MutableBuffer(reinterpret_cast<uint8_t*>(DATAPTR(vec)),
+      : MutableBuffer(reinterpret_cast<uint8_t*>(getDataPointer(vec)),
                       vec.size() * sizeof(typename RVector::value_type),
                       arrow::CPUDevice::memory_manager(gc_memory_pool())),
         vec_(vec) {}
@@ -181,6 +181,24 @@ class RBuffer : public MutableBuffer {
  private:
   // vec_ holds the memory
   RVector vec_;
+
+  static void* getDataPointer(RVector& vec) {
+    if (TYPEOF(vec) == LGLSXP) {
+      return LOGICAL(vec);
+    } else if (TYPEOF(vec) == INTSXP) {
+      return INTEGER(vec);
+    } else if (TYPEOF(vec) == REALSXP) {
+      return REAL(vec);
+    } else if (TYPEOF(vec) == CPLXSXP) {
+      return COMPLEX(vec);
+    } else if (TYPEOF(vec) == STRSXP) {
+      // We don't want to expose the string data here, so we error
+      cpp11::stop("Operation not supported for string vectors.");
+    } else {
+      // raw
+      return RAW(vec);
+    }
+  }
 };
 
 std::shared_ptr<arrow::DataType> InferArrowTypeFromFactor(SEXP);
diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp
index d2db11e14a..32e186ceac 100644
--- a/r/src/r_to_arrow.cpp
+++ b/r/src/r_to_arrow.cpp
@@ -1214,11 +1214,11 @@ bool can_reuse_memory(SEXP x, const 
std::shared_ptr<arrow::DataType>& type) {
   //       because MakeSimpleArray below will force materialization
   switch (type->id()) {
     case Type::INT32:
-      return TYPEOF(x) == INTSXP && !OBJECT(x);
+      return TYPEOF(x) == INTSXP && !Rf_isObject(x);
     case Type::DOUBLE:
-      return TYPEOF(x) == REALSXP && !OBJECT(x);
+      return TYPEOF(x) == REALSXP && !Rf_isObject(x);
     case Type::INT8:
-      return TYPEOF(x) == RAWSXP && !OBJECT(x);
+      return TYPEOF(x) == RAWSXP && !Rf_isObject(x);
     case Type::INT64:
       return TYPEOF(x) == REALSXP && Rf_inherits(x, "integer64");
     default:
@@ -1412,17 +1412,17 @@ bool vector_from_r_memory(SEXP x, const 
std::shared_ptr<DataType>& type,
 
   switch (type->id()) {
     case Type::INT32:
-      return TYPEOF(x) == INTSXP && !OBJECT(x) &&
+      return TYPEOF(x) == INTSXP && !Rf_isObject(x) &&
              vector_from_r_memory_impl<cpp11::integers, Int32Type>(x, type, 
columns, j,
                                                                    tasks);
 
     case Type::DOUBLE:
-      return TYPEOF(x) == REALSXP && !OBJECT(x) &&
+      return TYPEOF(x) == REALSXP && !Rf_isObject(x) &&
              vector_from_r_memory_impl<cpp11::doubles, DoubleType>(x, type, 
columns, j,
                                                                    tasks);
 
     case Type::UINT8:
-      return TYPEOF(x) == RAWSXP && !OBJECT(x) &&
+      return TYPEOF(x) == RAWSXP && !Rf_isObject(x) &&
              vector_from_r_memory_impl<cpp11::raws, UInt8Type>(x, type, 
columns, j,
                                                                tasks);
 
diff --git a/r/tests/testthat/test-altrep.R b/r/tests/testthat/test-altrep.R
index 50bd40988e..d1e90b6b59 100644
--- a/r/tests/testthat/test-altrep.R
+++ b/r/tests/testthat/test-altrep.R
@@ -170,7 +170,7 @@ test_that("element access methods for int32 ALTREP with 
nulls", {
   expect_identical(test_arrow_altrep_copy_by_region(altrep, 123), original)
   expect_false(test_arrow_altrep_is_materialized(altrep))
 
-  # because there are no nulls, DATAPTR() does not materialize
+  # because there are nulls, DATAPTR() does materialize
   expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original)
   expect_true(test_arrow_altrep_is_materialized(altrep))
 
@@ -193,7 +193,7 @@ test_that("element access methods for double ALTREP with 
nulls", {
   expect_identical(test_arrow_altrep_copy_by_region(altrep, 123), original)
   expect_false(test_arrow_altrep_is_materialized(altrep))
 
-  # because there are no nulls, DATAPTR() does not materialize
+  # because there are nulls, DATAPTR() does materialize
   expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original)
   expect_true(test_arrow_altrep_is_materialized(altrep))
 
@@ -244,14 +244,13 @@ test_that("element access methods for character ALTREP", {
   expect_identical(test_arrow_altrep_copy_by_element(altrep), original)
   expect_false(test_arrow_altrep_is_materialized(altrep))
 
-  # DATAPTR() should always materialize for strings
-  expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original)
+  # match() calls DATAPTR() internally which materializes the vector
+  match(altrep, c("1", "40", "999"))
   expect_true(test_arrow_altrep_is_materialized(altrep))
 
   # test element access after materialization
   expect_true(test_arrow_altrep_is_materialized(altrep))
   expect_identical(test_arrow_altrep_copy_by_element(altrep), original)
-  expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original)
 })
 
 test_that("element access methods for character ALTREP from large_utf8()", {
@@ -265,14 +264,13 @@ test_that("element access methods for character ALTREP 
from large_utf8()", {
   expect_identical(test_arrow_altrep_copy_by_element(altrep), original)
   expect_false(test_arrow_altrep_is_materialized(altrep))
 
-  # DATAPTR() should always materialize for strings
-  expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original)
+  # match() calls DATAPTR() internally which materializes the vector
+  match(altrep, c("1", "40", "999"))
   expect_true(test_arrow_altrep_is_materialized(altrep))
 
   # test element access after materialization
   expect_true(test_arrow_altrep_is_materialized(altrep))
   expect_identical(test_arrow_altrep_copy_by_element(altrep), original)
-  expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original)
 })
 
 test_that("empty vectors are not altrep", {

Reply via email to