This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new cc10a28 feat(r): Use classed warnings to signal that a lossy
conversion occurred (#298)
cc10a28 is described below
commit cc10a2857c25e57b83feac75e4654500fb450340
Author: Dewey Dunnington <[email protected]>
AuthorDate: Tue Sep 19 22:28:39 2023 +0200
feat(r): Use classed warnings to signal that a lossy conversion occurred
(#298)
This lets a caller take appropriate action (or give an error, or give a
better warning) if a lossy conversion occurs.
Closes #297.
``` r
library(nanoarrow)
array <- as_nanoarrow_array(2^54, schema = na_int64())
convert_array(array, double())
#> Warning in convert_array.default(array, double()): 1 value(s) may have
incurred
#> loss of precision in conversion to double()
#> [1] 1.80144e+16
withCallingHandlers(
convert_array(array, double()),
nanoarrow_warning_lossy_conversion = function(x) {
warning(
"This is a better explanation of what happened!",
call. = conditionCall(x)
)
tryInvokeRestart("muffleWarning")
}
)
#> Warning in (function (x) : This is a better explanation of what happened!
#> [1] 1.80144e+16
```
<sup>Created on 2023-09-18 with [reprex
v2.0.2](https://reprex.tidyverse.org)</sup>
---
r/R/util.R | 9 +++++++++
r/src/as_array.c | 3 +--
r/src/materialize_common.h | 11 +++++++++++
r/src/materialize_dbl.h | 8 ++++----
r/src/materialize_int.h | 2 +-
r/src/materialize_int64.h | 2 +-
r/src/materialize_unspecified.h | 2 +-
r/tests/testthat/test-as-array.R | 2 +-
r/tests/testthat/test-convert-array.R | 8 ++++----
9 files changed, 33 insertions(+), 14 deletions(-)
diff --git a/r/R/util.R b/r/R/util.R
index 260f34a..15f0292 100644
--- a/r/R/util.R
+++ b/r/R/util.R
@@ -38,6 +38,15 @@ assert_arrow_installed <- function(reason) {
}
}
+warn_lossy_conversion <- function(count, msg) {
+ cnd <- simpleWarning(
+ sprintf("%d value(s) %s", count, msg),
+ call = sys.call(-1)
+ )
+ class(cnd) <- union("nanoarrow_warning_lossy_conversion", class(cnd))
+
+ warning(cnd)
+}
# Internally we use R_PreserveObject() and R_ReleaseObject() to manage R
objects
# that must be kept alive for ArrowArray buffers to stay valid. This count
diff --git a/r/src/as_array.c b/r/src/as_array.c
index 5098e23..afba1f2 100644
--- a/r/src/as_array.c
+++ b/r/src/as_array.c
@@ -250,8 +250,7 @@ static void as_array_dbl(SEXP x_sexp, struct ArrowArray*
array, SEXP schema_xptr
}
if (n_overflow > 0) {
- Rf_warning("%ld value(s) overflowed in double -> na_int32() creation",
- (long)n_overflow);
+ warn_lossy_conversion(n_overflow, "overflowed in double -> na_int32()
creation");
}
buffer->size_bytes = len * sizeof(int32_t);
diff --git a/r/src/materialize_common.h b/r/src/materialize_common.h
index 1be73a9..4f5c52f 100644
--- a/r/src/materialize_common.h
+++ b/r/src/materialize_common.h
@@ -23,6 +23,8 @@
#include "nanoarrow.h"
+#include "util.h"
+
// Vector types that have some special casing internally to avoid unnecessary
allocations
// or looping at the R level. Some of these types also need an SEXP ptype to
communicate
// additional information.
@@ -105,4 +107,13 @@ struct RConverter {
struct RConverter** children;
};
+static inline void warn_lossy_conversion(int64_t count, const char* msg) {
+ SEXP fun = PROTECT(Rf_install("warn_lossy_conversion"));
+ SEXP count_sexp = PROTECT(Rf_ScalarReal(count));
+ SEXP msg_sexp = PROTECT(Rf_mkString(msg));
+ SEXP call = PROTECT(Rf_lang3(fun, count_sexp, msg_sexp));
+ Rf_eval(call, nanoarrow_ns_pkg);
+ UNPROTECT(4);
+}
+
#endif
diff --git a/r/src/materialize_dbl.h b/r/src/materialize_dbl.h
index 9269d88..a69b4eb 100644
--- a/r/src/materialize_dbl.h
+++ b/r/src/materialize_dbl.h
@@ -91,7 +91,8 @@ static inline int nanoarrow_materialize_dbl(struct
RConverter* converter) {
for (R_xlen_t i = 0; i < dst->length; i++) {
double value = ArrowArrayViewGetDoubleUnsafe(src->array_view,
src->offset + i);
if (value > MAX_DBL_AS_INTEGER || value < -MAX_DBL_AS_INTEGER) {
- n_bad_values++;
+ // Content of null slot is undefined
+ n_bad_values += is_valid == NULL || ArrowBitGet(is_valid,
raw_src_offset + i);
}
result[dst->offset + i] = value;
@@ -112,9 +113,8 @@ static inline int nanoarrow_materialize_dbl(struct
RConverter* converter) {
}
if (n_bad_values > 0) {
- Rf_warning(
- "%ld value(s) may have incurred loss of precision in conversion to
double()",
- (long)n_bad_values);
+ warn_lossy_conversion(
+ n_bad_values, "may have incurred loss of precision in conversion to
double()");
}
return NANOARROW_OK;
diff --git a/r/src/materialize_int.h b/r/src/materialize_int.h
index 60ead5a..57dff58 100644
--- a/r/src/materialize_int.h
+++ b/r/src/materialize_int.h
@@ -131,7 +131,7 @@ static inline int nanoarrow_materialize_int(struct
ArrayViewSlice* src,
}
if (n_bad_values > 0) {
- Rf_warning("%ld value(s) outside integer range set to NA",
(long)n_bad_values);
+ warn_lossy_conversion(n_bad_values, "outside integer range set to NA");
}
return NANOARROW_OK;
diff --git a/r/src/materialize_int64.h b/r/src/materialize_int64.h
index f63617b..ad83671 100644
--- a/r/src/materialize_int64.h
+++ b/r/src/materialize_int64.h
@@ -120,7 +120,7 @@ static inline int nanoarrow_materialize_int64(struct
ArrayViewSlice* src,
}
if (n_bad_values > 0) {
- Rf_warning("%ld value(s) outside integer64 range set to NA",
(long)n_bad_values);
+ warn_lossy_conversion(n_bad_values, "outside integer64 range set to NA");
}
return NANOARROW_OK;
diff --git a/r/src/materialize_unspecified.h b/r/src/materialize_unspecified.h
index 72bcc23..40e622c 100644
--- a/r/src/materialize_unspecified.h
+++ b/r/src/materialize_unspecified.h
@@ -52,7 +52,7 @@ static inline int nanoarrow_materialize_unspecified(struct
ArrayViewSlice* src,
}
if (n_bad_values > 0) {
- Rf_warning("%ld non-null value(s) set to NA", (long)n_bad_values);
+ warn_lossy_conversion(n_bad_values, "that were non-null set to NA");
}
}
diff --git a/r/tests/testthat/test-as-array.R b/r/tests/testthat/test-as-array.R
index e30c73f..52b144c 100644
--- a/r/tests/testthat/test-as-array.R
+++ b/r/tests/testthat/test-as-array.R
@@ -172,7 +172,7 @@ test_that("as_nanoarrow_array() works for double() ->
na_int32()", {
# With overflow
expect_warning(
as_nanoarrow_array(.Machine$integer.max + as.double(1:5), schema =
na_int32()),
- "5 value\\(s\\) overflowed"
+ class = "nanoarrow_warning_lossy_conversion"
)
})
diff --git a/r/tests/testthat/test-convert-array.R
b/r/tests/testthat/test-convert-array.R
index e2c3994..49265a8 100644
--- a/r/tests/testthat/test-convert-array.R
+++ b/r/tests/testthat/test-convert-array.R
@@ -208,7 +208,7 @@ test_that("convert to vector works for unspecified()", {
convert_array(array, vctrs::unspecified()),
vctrs::vec_cast(rep(NA, 10), vctrs::unspecified())
),
- "1 non-null value\\(s\\) set to NA"
+ class = "nanoarrow_warning_lossy_conversion"
)
})
@@ -391,13 +391,13 @@ test_that("convert to vector warns for invalid
integer()", {
array <- as_nanoarrow_array(.Machine$integer.max + 1)
expect_warning(
expect_identical(convert_array(array, integer()), NA_integer_),
- "1 value\\(s\\) outside integer range set to NA"
+ class = "nanoarrow_warning_lossy_conversion"
)
array <- as_nanoarrow_array(c(NA, .Machine$integer.max + 1))
expect_warning(
expect_identical(convert_array(array, integer()), c(NA_integer_,
NA_integer_)),
- "1 value\\(s\\) outside integer range set to NA"
+ class = "nanoarrow_warning_lossy_conversion"
)
})
@@ -519,7 +519,7 @@ test_that("convert to vector warns for possibly invalid
double()", {
array <- as_nanoarrow_array(2^54, schema = na_int64())
expect_warning(
convert_array(array, double()),
- "1 value\\(s\\) may have incurred loss of precision in conversion to
double()"
+ class = "nanoarrow_warning_lossy_conversion"
)
})