This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new cc10a28  feat(r): Use classed warnings to signal that a lossy 
conversion occurred (#298)
cc10a28 is described below

commit cc10a2857c25e57b83feac75e4654500fb450340
Author: Dewey Dunnington <[email protected]>
AuthorDate: Tue Sep 19 22:28:39 2023 +0200

    feat(r): Use classed warnings to signal that a lossy conversion occurred 
(#298)
    
    This lets a caller take appropriate action (or give an error, or give a
    better warning) if a lossy conversion occurs.
    
    Closes #297.
    
    ``` r
    library(nanoarrow)
    
    array <- as_nanoarrow_array(2^54, schema = na_int64())
    
    convert_array(array, double())
    #> Warning in convert_array.default(array, double()): 1 value(s) may have 
incurred
    #> loss of precision in conversion to double()
    #> [1] 1.80144e+16
    
    withCallingHandlers(
      convert_array(array, double()),
      nanoarrow_warning_lossy_conversion = function(x) {
        warning(
          "This is a better explanation of what happened!",
          call. = conditionCall(x)
        )
        tryInvokeRestart("muffleWarning")
      }
    )
    #> Warning in (function (x) : This is a better explanation of what happened!
    #> [1] 1.80144e+16
    ```
    
    <sup>Created on 2023-09-18 with [reprex
    v2.0.2](https://reprex.tidyverse.org)</sup>
---
 r/R/util.R                            |  9 +++++++++
 r/src/as_array.c                      |  3 +--
 r/src/materialize_common.h            | 11 +++++++++++
 r/src/materialize_dbl.h               |  8 ++++----
 r/src/materialize_int.h               |  2 +-
 r/src/materialize_int64.h             |  2 +-
 r/src/materialize_unspecified.h       |  2 +-
 r/tests/testthat/test-as-array.R      |  2 +-
 r/tests/testthat/test-convert-array.R |  8 ++++----
 9 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/r/R/util.R b/r/R/util.R
index 260f34a..15f0292 100644
--- a/r/R/util.R
+++ b/r/R/util.R
@@ -38,6 +38,15 @@ assert_arrow_installed <- function(reason) {
   }
 }
 
+warn_lossy_conversion <- function(count, msg) {
+  cnd <- simpleWarning(
+    sprintf("%d value(s) %s", count, msg),
+    call = sys.call(-1)
+  )
+  class(cnd) <- union("nanoarrow_warning_lossy_conversion", class(cnd))
+
+  warning(cnd)
+}
 
 # Internally we use R_PreserveObject() and R_ReleaseObject() to manage R 
objects
 # that must be kept alive for ArrowArray buffers to stay valid. This count
diff --git a/r/src/as_array.c b/r/src/as_array.c
index 5098e23..afba1f2 100644
--- a/r/src/as_array.c
+++ b/r/src/as_array.c
@@ -250,8 +250,7 @@ static void as_array_dbl(SEXP x_sexp, struct ArrowArray* 
array, SEXP schema_xptr
     }
 
     if (n_overflow > 0) {
-      Rf_warning("%ld value(s) overflowed in double -> na_int32() creation",
-                 (long)n_overflow);
+      warn_lossy_conversion(n_overflow, "overflowed in double -> na_int32() 
creation");
     }
 
     buffer->size_bytes = len * sizeof(int32_t);
diff --git a/r/src/materialize_common.h b/r/src/materialize_common.h
index 1be73a9..4f5c52f 100644
--- a/r/src/materialize_common.h
+++ b/r/src/materialize_common.h
@@ -23,6 +23,8 @@
 
 #include "nanoarrow.h"
 
+#include "util.h"
+
 // Vector types that have some special casing internally to avoid unnecessary 
allocations
 // or looping at the R level. Some of these types also need an SEXP ptype to 
communicate
 // additional information.
@@ -105,4 +107,13 @@ struct RConverter {
   struct RConverter** children;
 };
 
+static inline void warn_lossy_conversion(int64_t count, const char* msg) {
+  SEXP fun = PROTECT(Rf_install("warn_lossy_conversion"));
+  SEXP count_sexp = PROTECT(Rf_ScalarReal(count));
+  SEXP msg_sexp = PROTECT(Rf_mkString(msg));
+  SEXP call = PROTECT(Rf_lang3(fun, count_sexp, msg_sexp));
+  Rf_eval(call, nanoarrow_ns_pkg);
+  UNPROTECT(4);
+}
+
 #endif
diff --git a/r/src/materialize_dbl.h b/r/src/materialize_dbl.h
index 9269d88..a69b4eb 100644
--- a/r/src/materialize_dbl.h
+++ b/r/src/materialize_dbl.h
@@ -91,7 +91,8 @@ static inline int nanoarrow_materialize_dbl(struct 
RConverter* converter) {
       for (R_xlen_t i = 0; i < dst->length; i++) {
         double value = ArrowArrayViewGetDoubleUnsafe(src->array_view, 
src->offset + i);
         if (value > MAX_DBL_AS_INTEGER || value < -MAX_DBL_AS_INTEGER) {
-          n_bad_values++;
+          // Content of null slot is undefined
+          n_bad_values += is_valid == NULL || ArrowBitGet(is_valid, 
raw_src_offset + i);
         }
 
         result[dst->offset + i] = value;
@@ -112,9 +113,8 @@ static inline int nanoarrow_materialize_dbl(struct 
RConverter* converter) {
   }
 
   if (n_bad_values > 0) {
-    Rf_warning(
-        "%ld value(s) may have incurred loss of precision in conversion to 
double()",
-        (long)n_bad_values);
+    warn_lossy_conversion(
+        n_bad_values, "may have incurred loss of precision in conversion to 
double()");
   }
 
   return NANOARROW_OK;
diff --git a/r/src/materialize_int.h b/r/src/materialize_int.h
index 60ead5a..57dff58 100644
--- a/r/src/materialize_int.h
+++ b/r/src/materialize_int.h
@@ -131,7 +131,7 @@ static inline int nanoarrow_materialize_int(struct 
ArrayViewSlice* src,
   }
 
   if (n_bad_values > 0) {
-    Rf_warning("%ld value(s) outside integer range set to NA", 
(long)n_bad_values);
+    warn_lossy_conversion(n_bad_values, "outside integer range set to NA");
   }
 
   return NANOARROW_OK;
diff --git a/r/src/materialize_int64.h b/r/src/materialize_int64.h
index f63617b..ad83671 100644
--- a/r/src/materialize_int64.h
+++ b/r/src/materialize_int64.h
@@ -120,7 +120,7 @@ static inline int nanoarrow_materialize_int64(struct 
ArrayViewSlice* src,
   }
 
   if (n_bad_values > 0) {
-    Rf_warning("%ld value(s) outside integer64 range set to NA", 
(long)n_bad_values);
+    warn_lossy_conversion(n_bad_values, "outside integer64 range set to NA");
   }
 
   return NANOARROW_OK;
diff --git a/r/src/materialize_unspecified.h b/r/src/materialize_unspecified.h
index 72bcc23..40e622c 100644
--- a/r/src/materialize_unspecified.h
+++ b/r/src/materialize_unspecified.h
@@ -52,7 +52,7 @@ static inline int nanoarrow_materialize_unspecified(struct 
ArrayViewSlice* src,
     }
 
     if (n_bad_values > 0) {
-      Rf_warning("%ld non-null value(s) set to NA", (long)n_bad_values);
+      warn_lossy_conversion(n_bad_values, "that were non-null set to NA");
     }
   }
 
diff --git a/r/tests/testthat/test-as-array.R b/r/tests/testthat/test-as-array.R
index e30c73f..52b144c 100644
--- a/r/tests/testthat/test-as-array.R
+++ b/r/tests/testthat/test-as-array.R
@@ -172,7 +172,7 @@ test_that("as_nanoarrow_array() works for double() -> 
na_int32()", {
   # With overflow
   expect_warning(
     as_nanoarrow_array(.Machine$integer.max + as.double(1:5), schema = 
na_int32()),
-    "5 value\\(s\\) overflowed"
+    class = "nanoarrow_warning_lossy_conversion"
   )
 })
 
diff --git a/r/tests/testthat/test-convert-array.R 
b/r/tests/testthat/test-convert-array.R
index e2c3994..49265a8 100644
--- a/r/tests/testthat/test-convert-array.R
+++ b/r/tests/testthat/test-convert-array.R
@@ -208,7 +208,7 @@ test_that("convert to vector works for unspecified()", {
       convert_array(array, vctrs::unspecified()),
       vctrs::vec_cast(rep(NA, 10), vctrs::unspecified())
     ),
-    "1 non-null value\\(s\\) set to NA"
+    class = "nanoarrow_warning_lossy_conversion"
   )
 })
 
@@ -391,13 +391,13 @@ test_that("convert to vector warns for invalid 
integer()", {
   array <- as_nanoarrow_array(.Machine$integer.max + 1)
   expect_warning(
     expect_identical(convert_array(array, integer()), NA_integer_),
-    "1 value\\(s\\) outside integer range set to NA"
+    class = "nanoarrow_warning_lossy_conversion"
   )
 
   array <- as_nanoarrow_array(c(NA, .Machine$integer.max + 1))
   expect_warning(
     expect_identical(convert_array(array, integer()), c(NA_integer_, 
NA_integer_)),
-    "1 value\\(s\\) outside integer range set to NA"
+    class = "nanoarrow_warning_lossy_conversion"
   )
 })
 
@@ -519,7 +519,7 @@ test_that("convert to vector warns for possibly invalid 
double()", {
   array <- as_nanoarrow_array(2^54, schema = na_int64())
   expect_warning(
     convert_array(array, double()),
-    "1 value\\(s\\) may have incurred loss of precision in conversion to 
double()"
+    class = "nanoarrow_warning_lossy_conversion"
   )
 })
 

Reply via email to