This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 9447c63 fix: Explicit `stringsAsFactors = FALSE` for R <= 3.6 (#135)
9447c63 is described below
commit 9447c63715d4ec371b81d6bcb5d34b1a869efd2c
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed Mar 1 09:45:24 2023 -0400
fix: Explicit `stringsAsFactors = FALSE` for R <= 3.6 (#135)
Closes #131.
---
.github/workflows/r-check.yaml | 10 ++-
r/DESCRIPTION | 2 +-
r/tests/testthat/test-altrep.R | 2 +-
r/tests/testthat/test-array.R | 4 +-
r/tests/testthat/test-convert-array-stream.R | 5 +-
r/tests/testthat/test-convert-array.R | 17 +++--
r/tests/testthat/test-infer-ptype.R | 6 +-
r/tests/testthat/test-util.R | 10 ++-
src/nanoarrow/array.c | 4 +-
src/nanoarrow/nanoarrow.h | 4 +-
src/nanoarrow/schema.c | 97 ++++++++++++----------------
src/nanoarrow/utils.c | 4 +-
12 files changed, 87 insertions(+), 78 deletions(-)
diff --git a/.github/workflows/r-check.yaml b/.github/workflows/r-check.yaml
index 3d3ea01..85ba356 100644
--- a/.github/workflows/r-check.yaml
+++ b/.github/workflows/r-check.yaml
@@ -19,9 +19,11 @@ name: test-r
on:
push:
- branches: main
+ branches:
+ - main
pull_request:
- branches: main
+ branches:
+ - main
paths:
- '.github/workflows/r-check.yaml'
- 'src/nanoarrow/**'
@@ -38,6 +40,7 @@ jobs:
matrix:
config:
- {os: macOS-latest, r: 'release'}
+ - {os: windows-latest, r: '3.6'}
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}
@@ -70,13 +73,14 @@ jobs:
- uses: r-lib/actions/setup-r-dependencies@v2
with:
- extra-packages: any::rcmdcheck
+ extra-packages: any::rcmdcheck, arrow=?ignore-before-r=4.0.0
needs: check
working-directory: r
- uses: r-lib/actions/check-r-package@v2
env:
ARROW_R_VERBOSE_TEST: "true"
+ _R_CHECK_FORCE_SUGGESTS_: false
with:
upload-snapshots: true
working-directory: r
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 5707ee0..67cc9aa 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -22,7 +22,7 @@ RoxygenNote: 7.2.3
URL: https://github.com/apache/arrow-nanoarrow
BugReports: https://github.com/apache/arrow-nanoarrow/issues
Suggests:
- arrow (>= 10.0.0),
+ arrow (>= 9.0.0),
blob,
hms,
rlang,
diff --git a/r/tests/testthat/test-altrep.R b/r/tests/testthat/test-altrep.R
index 821fc65..0658c76 100644
--- a/r/tests/testthat/test-altrep.R
+++ b/r/tests/testthat/test-altrep.R
@@ -72,7 +72,7 @@ test_that("nanoarrow_altrep_chr_force_materialize() forces
materialization", {
expect_identical(nanoarrow_altrep_force_materialize(x_altrep), 1L)
x <- as_nanoarrow_array(letters, schema = na_string())
- x_altrep_df <- data.frame(x = nanoarrow_altrep_chr(x))
+ x_altrep_df <- data.frame(x = nanoarrow_altrep_chr(x), stringsAsFactors =
FALSE)
expect_identical(
nanoarrow_altrep_force_materialize(x_altrep_df, recursive = FALSE),
0L
diff --git a/r/tests/testthat/test-array.R b/r/tests/testthat/test-array.R
index 8b23ce7..7b9c113 100644
--- a/r/tests/testthat/test-array.R
+++ b/r/tests/testthat/test-array.R
@@ -100,7 +100,7 @@ test_that("schemaless array list interface works for
non-nested types", {
})
test_that("schemaless array list interface works for nested types", {
- array <- as_nanoarrow_array(data.frame(a = 1L, b = "two"))
+ array <- as_nanoarrow_array(data.frame(a = 1L, b = "two", stringsAsFactors =
FALSE))
nanoarrow_array_set_schema(array, NULL)
expect_length(array$children, 2L)
@@ -203,7 +203,7 @@ test_that("array list interface classes offset buffers for
relevant types", {
})
test_that("array list interface works for nested types", {
- array <- as_nanoarrow_array(data.frame(a = 1L, b = "two"))
+ array <- as_nanoarrow_array(data.frame(a = 1L, b = "two", stringsAsFactors =
FALSE))
expect_named(array$children, c("a", "b"))
expect_s3_class(array$children[[1]], "nanoarrow_array")
diff --git a/r/tests/testthat/test-convert-array-stream.R
b/r/tests/testthat/test-convert-array-stream.R
index 404a43f..9e07124 100644
--- a/r/tests/testthat/test-convert-array-stream.R
+++ b/r/tests/testthat/test-convert-array-stream.R
@@ -99,7 +99,10 @@ test_that("convert array stream works for nested
data.frames", {
})
test_that("convert array stream works for struct-style vectors", {
- raw_posixlt <- as.data.frame(unclass(as.POSIXlt("2021-01-01", tz =
"America/Halifax")))
+ raw_posixlt <- as.data.frame(
+ unclass(as.POSIXlt("2021-01-01", tz = "America/Halifax")),
+ stringsAsFactors = FALSE
+ )
stream <- as_nanoarrow_array_stream(raw_posixlt)
expect_identical(
diff --git a/r/tests/testthat/test-convert-array.R
b/r/tests/testthat/test-convert-array.R
index 2f21ad1..4175c10 100644
--- a/r/tests/testthat/test-convert-array.R
+++ b/r/tests/testthat/test-convert-array.R
@@ -61,7 +61,7 @@ test_that("convert_array() errors for unsupported array", {
})
test_that("convert to vector works for data.frame", {
- df <- data.frame(a = 1L, b = "two", c = 3, d = TRUE)
+ df <- data.frame(a = 1L, b = "two", c = 3, d = TRUE, stringsAsFactors =
FALSE)
array <- as_nanoarrow_array(df)
expect_identical(convert_array(array, NULL), df)
@@ -80,10 +80,12 @@ test_that("convert to vector works for data.frame", {
})
test_that("convert to vector works for partial_frame", {
- array <- as_nanoarrow_array(data.frame(a = 1L, b = "two"))
+ array <- as_nanoarrow_array(
+ data.frame(a = 1L, b = "two", stringsAsFactors = FALSE)
+ )
expect_identical(
convert_array(array, vctrs::partial_frame()),
- data.frame(a = 1L, b = "two")
+ data.frame(a = 1L, b = "two", stringsAsFactors = FALSE)
)
})
@@ -108,7 +110,9 @@ test_that("convert to vector works for function()", {
})
test_that("convert to vector works for tibble", {
- array <- as_nanoarrow_array(data.frame(a = 1L, b = "two"))
+ array <- as_nanoarrow_array(
+ data.frame(a = 1L, b = "two", stringsAsFactors = FALSE)
+ )
expect_identical(
convert_array(array, tibble::tibble(a = integer(), b = character())),
tibble::tibble(a = 1L, b = "two")
@@ -135,7 +139,10 @@ test_that("convert to vector works for struct-style
vectors", {
array <- as_nanoarrow_array(as.POSIXlt("2021-01-01", tz = "America/Halifax"))
expect_identical(
convert_array(array),
- as.data.frame(unclass(as.POSIXlt("2021-01-01", tz = "America/Halifax")))
+ as.data.frame(
+ unclass(as.POSIXlt("2021-01-01", tz = "America/Halifax")),
+ stringsAsFactors = FALSE
+ )
)
array <- as_nanoarrow_array(as.POSIXlt("2021-01-01", tz = "America/Halifax"))
diff --git a/r/tests/testthat/test-infer-ptype.R
b/r/tests/testthat/test-infer-ptype.R
index 2fe3c96..920d9d8 100644
--- a/r/tests/testthat/test-infer-ptype.R
+++ b/r/tests/testthat/test-infer-ptype.R
@@ -63,8 +63,10 @@ test_that("infer_nanoarrow_ptype() works for basic types", {
)
expect_identical(
- infer_nanoarrow_ptype(as_nanoarrow_array(data.frame(x = character()))),
- data.frame(x = character())
+ infer_nanoarrow_ptype(
+ as_nanoarrow_array(data.frame(x = character(), stringsAsFactors = FALSE))
+ ),
+ data.frame(x = character(), stringsAsFactors = FALSE)
)
})
diff --git a/r/tests/testthat/test-util.R b/r/tests/testthat/test-util.R
index 93b313b..c1bdc8e 100644
--- a/r/tests/testthat/test-util.R
+++ b/r/tests/testthat/test-util.R
@@ -56,7 +56,13 @@ test_that("new_data_frame() works", {
})
test_that("vector fuzzers work", {
- ptype <- data.frame(a = logical(), b = integer(), c = double(), d =
character())
+ ptype <- data.frame(
+ a = logical(),
+ b = integer(),
+ c = double(),
+ d = character(),
+ stringsAsFactors = FALSE
+ )
df_gen <- vec_gen(ptype, n = 123)
expect_identical(nrow(df_gen), 123L)
@@ -66,7 +72,7 @@ test_that("vector fuzzers work", {
})
test_that("vector shuffler works", {
- df <- data.frame(letters = letters)
+ df <- data.frame(letters = letters, stringsAsFactors = FALSE)
df_shuffled <- vec_shuffle(df)
expect_setequal(df_shuffled$letters, df$letters)
diff --git a/src/nanoarrow/array.c b/src/nanoarrow/array.c
index 068122f..976b39e 100644
--- a/src/nanoarrow/array.c
+++ b/src/nanoarrow/array.c
@@ -420,8 +420,8 @@ static ArrowErrorCode ArrowArrayCheckInternalBufferSizes(
if (actual_size < expected_size) {
ArrowErrorSet(
error,
- "Expected buffer %d to size >= %ld bytes but found buffer with %ld
bytes", i,
- (long)expected_size, (long)actual_size);
+ "Expected buffer %d to size >= %ld bytes but found buffer with %ld
bytes",
+ (int)i, (long)expected_size, (long)actual_size);
return EINVAL;
}
}
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index 03bc381..1d2a721 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -201,10 +201,10 @@ const char* ArrowErrorMessage(struct ArrowError* error);
/// @{
/// \brief Return a version string in the form "major.minor.patch"
-const char* ArrowNanoarrowVersion();
+const char* ArrowNanoarrowVersion(void);
/// \brief Return an integer that can be used to compare versions sequentially
-int ArrowNanoarrowVersionInt();
+int ArrowNanoarrowVersionInt(void);
/// \brief Initialize a description of buffer arrangements from a storage type
void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type);
diff --git a/src/nanoarrow/schema.c b/src/nanoarrow/schema.c
index 3ce6518..8b24fbe 100644
--- a/src/nanoarrow/schema.c
+++ b/src/nanoarrow/schema.c
@@ -1211,6 +1211,24 @@ static int64_t ArrowSchemaTypeToStringInternal(struct
ArrowSchemaView* schema_vi
}
}
+// Helper for bookeeping to emulate sprintf()-like behaviour spread
+// among multiple sprintf calls.
+static inline void ArrowToStringLogChars(char** out, int64_t n_chars_last,
+ int64_t* n_remaining, int64_t*
n_chars) {
+ *n_chars += n_chars_last;
+ *n_remaining -= n_chars_last;
+
+ // n_remaining is never less than 0
+ if (*n_remaining < 0) {
+ *n_remaining = 0;
+ }
+
+ // Can't do math on a NULL pointer
+ if (*out != NULL) {
+ *out += n_chars_last;
+ }
+}
+
int64_t ArrowSchemaToString(struct ArrowSchema* schema, char* out, int64_t n,
char recursive) {
if (schema == NULL) {
@@ -1237,90 +1255,59 @@ int64_t ArrowSchemaToString(struct ArrowSchema* schema,
char* out, int64_t n,
// Uncommon but not technically impossible that both are true
if (is_extension && is_dictionary) {
- n_chars_last = snprintf(out + n_chars, n, "%.*s{dictionary(%s)<",
- (int)schema_view.extension_name.size_bytes,
- schema_view.extension_name.data,
- ArrowTypeString(schema_view.storage_type));
+ n_chars_last = snprintf(
+ out, n, "%.*s{dictionary(%s)<",
(int)schema_view.extension_name.size_bytes,
+ schema_view.extension_name.data,
ArrowTypeString(schema_view.storage_type));
} else if (is_extension) {
- n_chars_last =
- snprintf(out + n_chars, n, "%.*s{",
(int)schema_view.extension_name.size_bytes,
- schema_view.extension_name.data);
+ n_chars_last = snprintf(out, n, "%.*s{",
(int)schema_view.extension_name.size_bytes,
+ schema_view.extension_name.data);
} else if (is_dictionary) {
- n_chars_last = snprintf(out + n_chars, n, "dictionary(%s)<",
- ArrowTypeString(schema_view.storage_type));
+ n_chars_last =
+ snprintf(out, n, "dictionary(%s)<",
ArrowTypeString(schema_view.storage_type));
}
- n_chars += n_chars_last;
- n -= n_chars_last;
- if (n < 0) {
- n = 0;
- }
+ ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
if (!is_dictionary) {
- n_chars_last = ArrowSchemaTypeToStringInternal(&schema_view, out +
n_chars, n);
+ n_chars_last = ArrowSchemaTypeToStringInternal(&schema_view, out, n);
} else {
- n_chars_last = ArrowSchemaToString(schema->dictionary, out + n_chars, n,
recursive);
+ n_chars_last = ArrowSchemaToString(schema->dictionary, out, n, recursive);
}
- n_chars += n_chars_last;
- n -= n_chars_last;
- if (n < 0) {
- n = 0;
- }
+ ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
if (recursive && schema->format[0] == '+') {
- n_chars_last = snprintf(out + n_chars, n, "<");
- n_chars += n_chars_last;
- n -= n_chars_last;
- if (n < 0) {
- n = 0;
- }
+ n_chars_last = snprintf(out, n, "<");
+ ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
for (int64_t i = 0; i < schema->n_children; i++) {
if (i > 0) {
- n_chars_last = snprintf(out + n_chars, n, ", ");
- n_chars += n_chars_last;
- n -= n_chars_last;
- if (n < 0) {
- n = 0;
- }
+ n_chars_last = snprintf(out, n, ", ");
+ ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
}
// ArrowSchemaToStringInternal() will validate the child and print the
error,
// but we need the name first
if (schema->children[i] != NULL && schema->children[i]->release != NULL
&&
schema->children[i]->name != NULL) {
- n_chars_last = snprintf(out + n_chars, n, "%s: ",
schema->children[i]->name);
- n_chars += n_chars_last;
- n -= n_chars_last;
- if (n < 0) {
- n = 0;
- }
+ n_chars_last = snprintf(out, n, "%s: ", schema->children[i]->name);
+ ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
}
- n_chars_last =
- ArrowSchemaToString(schema->children[i], out + n_chars, n,
recursive);
- n_chars += n_chars_last;
- n -= n_chars_last;
- if (n < 0) {
- n = 0;
- }
+ n_chars_last = ArrowSchemaToString(schema->children[i], out, n,
recursive);
+ ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
}
- n_chars_last = snprintf(out + n_chars, n, ">");
- n_chars += n_chars_last;
- n -= n_chars_last;
- if (n < 0) {
- n = 0;
- }
+ n_chars_last = snprintf(out, n, ">");
+ ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
}
if (is_extension && is_dictionary) {
- n_chars += snprintf(out + n_chars, n, ">}");
+ n_chars += snprintf(out, n, ">}");
} else if (is_extension) {
- n_chars += snprintf(out + n_chars, n, "}");
+ n_chars += snprintf(out, n, "}");
} else if (is_dictionary) {
- n_chars += snprintf(out + n_chars, n, ">");
+ n_chars += snprintf(out, n, ">");
}
return n_chars;
diff --git a/src/nanoarrow/utils.c b/src/nanoarrow/utils.c
index fca0f80..b87c1c3 100644
--- a/src/nanoarrow/utils.c
+++ b/src/nanoarrow/utils.c
@@ -24,9 +24,9 @@
#include "nanoarrow.h"
-const char* ArrowNanoarrowVersion() { return NANOARROW_VERSION; }
+const char* ArrowNanoarrowVersion(void) { return NANOARROW_VERSION; }
-int ArrowNanoarrowVersionInt() { return NANOARROW_VERSION_INT; }
+int ArrowNanoarrowVersionInt(void) { return NANOARROW_VERSION_INT; }
int ArrowErrorSet(struct ArrowError* error, const char* fmt, ...) {
if (error == NULL) {