[arrow-nanoarrow] branch main updated: fix: Explicit `stringsAsFactors = FALSE` for R <= 3.6 (#135)

paleolimbot Wed, 01 Mar 2023 05:45:34 -0800

This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git



The following commit(s) were added to refs/heads/main by this push:
     new 9447c63  fix: Explicit `stringsAsFactors = FALSE` for R <= 3.6 (#135)
9447c63 is described below

commit 9447c63715d4ec371b81d6bcb5d34b1a869efd2c
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed Mar 1 09:45:24 2023 -0400

    fix: Explicit `stringsAsFactors = FALSE` for R <= 3.6 (#135)
    
    Closes #131.
---
 .github/workflows/r-check.yaml               | 10 ++-
 r/DESCRIPTION                                |  2 +-
 r/tests/testthat/test-altrep.R               |  2 +-
 r/tests/testthat/test-array.R                |  4 +-
 r/tests/testthat/test-convert-array-stream.R |  5 +-
 r/tests/testthat/test-convert-array.R        | 17 +++--
 r/tests/testthat/test-infer-ptype.R          |  6 +-
 r/tests/testthat/test-util.R                 | 10 ++-
 src/nanoarrow/array.c                        |  4 +-
 src/nanoarrow/nanoarrow.h                    |  4 +-
 src/nanoarrow/schema.c                       | 97 ++++++++++++----------------
 src/nanoarrow/utils.c                        |  4 +-
 12 files changed, 87 insertions(+), 78 deletions(-)

diff --git a/.github/workflows/r-check.yaml b/.github/workflows/r-check.yaml
index 3d3ea01..85ba356 100644
--- a/.github/workflows/r-check.yaml
+++ b/.github/workflows/r-check.yaml
@@ -19,9 +19,11 @@ name: test-r
 
 on:
   push:
-    branches: main
+    branches:
+      - main
   pull_request:
-    branches: main
+    branches:
+      - main
     paths:
       - '.github/workflows/r-check.yaml'
       - 'src/nanoarrow/**'
@@ -38,6 +40,7 @@ jobs:
       matrix:
         config:
           - {os: macOS-latest,   r: 'release'}
+          - {os: windows-latest, r: '3.6'}
           - {os: windows-latest, r: 'release'}
           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
           - {os: ubuntu-latest,   r: 'release'}
@@ -70,13 +73,14 @@ jobs:
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
-          extra-packages: any::rcmdcheck
+          extra-packages: any::rcmdcheck, arrow=?ignore-before-r=4.0.0
           needs: check
           working-directory: r
 
       - uses: r-lib/actions/check-r-package@v2
         env:
           ARROW_R_VERBOSE_TEST: "true"
+          _R_CHECK_FORCE_SUGGESTS_: false
         with:
           upload-snapshots: true
           working-directory: r
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 5707ee0..67cc9aa 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -22,7 +22,7 @@ RoxygenNote: 7.2.3
 URL: https://github.com/apache/arrow-nanoarrow
 BugReports: https://github.com/apache/arrow-nanoarrow/issues
 Suggests: 
-    arrow (>= 10.0.0),
+    arrow (>= 9.0.0),
     blob,
     hms,
     rlang,
diff --git a/r/tests/testthat/test-altrep.R b/r/tests/testthat/test-altrep.R
index 821fc65..0658c76 100644
--- a/r/tests/testthat/test-altrep.R
+++ b/r/tests/testthat/test-altrep.R
@@ -72,7 +72,7 @@ test_that("nanoarrow_altrep_chr_force_materialize() forces 
materialization", {
   expect_identical(nanoarrow_altrep_force_materialize(x_altrep), 1L)
 
   x <- as_nanoarrow_array(letters, schema = na_string())
-  x_altrep_df <- data.frame(x = nanoarrow_altrep_chr(x))
+  x_altrep_df <- data.frame(x = nanoarrow_altrep_chr(x), stringsAsFactors = 
FALSE)
   expect_identical(
     nanoarrow_altrep_force_materialize(x_altrep_df, recursive = FALSE),
     0L
diff --git a/r/tests/testthat/test-array.R b/r/tests/testthat/test-array.R
index 8b23ce7..7b9c113 100644
--- a/r/tests/testthat/test-array.R
+++ b/r/tests/testthat/test-array.R
@@ -100,7 +100,7 @@ test_that("schemaless array list interface works for 
non-nested types", {
 })
 
 test_that("schemaless array list interface works for nested types", {
-  array <- as_nanoarrow_array(data.frame(a = 1L, b = "two"))
+  array <- as_nanoarrow_array(data.frame(a = 1L, b = "two", stringsAsFactors = 
FALSE))
   nanoarrow_array_set_schema(array, NULL)
 
   expect_length(array$children, 2L)
@@ -203,7 +203,7 @@ test_that("array list interface classes offset buffers for 
relevant types", {
 })
 
 test_that("array list interface works for nested types", {
-  array <- as_nanoarrow_array(data.frame(a = 1L, b = "two"))
+  array <- as_nanoarrow_array(data.frame(a = 1L, b = "two", stringsAsFactors = 
FALSE))
 
   expect_named(array$children, c("a", "b"))
   expect_s3_class(array$children[[1]], "nanoarrow_array")
diff --git a/r/tests/testthat/test-convert-array-stream.R 
b/r/tests/testthat/test-convert-array-stream.R
index 404a43f..9e07124 100644
--- a/r/tests/testthat/test-convert-array-stream.R
+++ b/r/tests/testthat/test-convert-array-stream.R
@@ -99,7 +99,10 @@ test_that("convert array stream works for nested 
data.frames", {
 })
 
 test_that("convert array stream works for struct-style vectors", {
-  raw_posixlt <- as.data.frame(unclass(as.POSIXlt("2021-01-01", tz = 
"America/Halifax")))
+  raw_posixlt <- as.data.frame(
+    unclass(as.POSIXlt("2021-01-01", tz = "America/Halifax")),
+    stringsAsFactors = FALSE
+  )
 
   stream <- as_nanoarrow_array_stream(raw_posixlt)
   expect_identical(
diff --git a/r/tests/testthat/test-convert-array.R 
b/r/tests/testthat/test-convert-array.R
index 2f21ad1..4175c10 100644
--- a/r/tests/testthat/test-convert-array.R
+++ b/r/tests/testthat/test-convert-array.R
@@ -61,7 +61,7 @@ test_that("convert_array() errors for unsupported array", {
 })
 
 test_that("convert to vector works for data.frame", {
-  df <- data.frame(a = 1L, b = "two", c = 3, d = TRUE)
+  df <- data.frame(a = 1L, b = "two", c = 3, d = TRUE, stringsAsFactors = 
FALSE)
   array <- as_nanoarrow_array(df)
 
   expect_identical(convert_array(array, NULL), df)
@@ -80,10 +80,12 @@ test_that("convert to vector works for data.frame", {
 })
 
 test_that("convert to vector works for partial_frame", {
-  array <- as_nanoarrow_array(data.frame(a = 1L, b = "two"))
+  array <- as_nanoarrow_array(
+    data.frame(a = 1L, b = "two", stringsAsFactors = FALSE)
+  )
   expect_identical(
     convert_array(array, vctrs::partial_frame()),
-    data.frame(a = 1L, b = "two")
+    data.frame(a = 1L, b = "two", stringsAsFactors = FALSE)
   )
 })
 
@@ -108,7 +110,9 @@ test_that("convert to vector works for function()", {
 })
 
 test_that("convert to vector works for tibble", {
-  array <- as_nanoarrow_array(data.frame(a = 1L, b = "two"))
+  array <- as_nanoarrow_array(
+    data.frame(a = 1L, b = "two", stringsAsFactors = FALSE)
+  )
   expect_identical(
     convert_array(array, tibble::tibble(a = integer(), b = character())),
     tibble::tibble(a = 1L, b = "two")
@@ -135,7 +139,10 @@ test_that("convert to vector works for struct-style 
vectors", {
   array <- as_nanoarrow_array(as.POSIXlt("2021-01-01", tz = "America/Halifax"))
   expect_identical(
     convert_array(array),
-    as.data.frame(unclass(as.POSIXlt("2021-01-01", tz = "America/Halifax")))
+    as.data.frame(
+      unclass(as.POSIXlt("2021-01-01", tz = "America/Halifax")),
+      stringsAsFactors = FALSE
+    )
   )
 
   array <- as_nanoarrow_array(as.POSIXlt("2021-01-01", tz = "America/Halifax"))
diff --git a/r/tests/testthat/test-infer-ptype.R 
b/r/tests/testthat/test-infer-ptype.R
index 2fe3c96..920d9d8 100644
--- a/r/tests/testthat/test-infer-ptype.R
+++ b/r/tests/testthat/test-infer-ptype.R
@@ -63,8 +63,10 @@ test_that("infer_nanoarrow_ptype() works for basic types", {
   )
 
   expect_identical(
-    infer_nanoarrow_ptype(as_nanoarrow_array(data.frame(x = character()))),
-    data.frame(x = character())
+    infer_nanoarrow_ptype(
+      as_nanoarrow_array(data.frame(x = character(), stringsAsFactors = FALSE))
+    ),
+    data.frame(x = character(), stringsAsFactors = FALSE)
   )
 })
 
diff --git a/r/tests/testthat/test-util.R b/r/tests/testthat/test-util.R
index 93b313b..c1bdc8e 100644
--- a/r/tests/testthat/test-util.R
+++ b/r/tests/testthat/test-util.R
@@ -56,7 +56,13 @@ test_that("new_data_frame() works", {
 })
 
 test_that("vector fuzzers work", {
-  ptype <- data.frame(a = logical(), b = integer(), c = double(), d = 
character())
+  ptype <- data.frame(
+    a = logical(),
+    b = integer(),
+    c = double(),
+    d = character(),
+    stringsAsFactors = FALSE
+  )
   df_gen <- vec_gen(ptype, n = 123)
 
   expect_identical(nrow(df_gen), 123L)
@@ -66,7 +72,7 @@ test_that("vector fuzzers work", {
 })
 
 test_that("vector shuffler works", {
-  df <- data.frame(letters = letters)
+  df <- data.frame(letters = letters, stringsAsFactors = FALSE)
   df_shuffled <- vec_shuffle(df)
   expect_setequal(df_shuffled$letters, df$letters)
 
diff --git a/src/nanoarrow/array.c b/src/nanoarrow/array.c
index 068122f..976b39e 100644
--- a/src/nanoarrow/array.c
+++ b/src/nanoarrow/array.c
@@ -420,8 +420,8 @@ static ArrowErrorCode ArrowArrayCheckInternalBufferSizes(
     if (actual_size < expected_size) {
       ArrowErrorSet(
           error,
-          "Expected buffer %d to size >= %ld bytes but found buffer with %ld 
bytes", i,
-          (long)expected_size, (long)actual_size);
+          "Expected buffer %d to size >= %ld bytes but found buffer with %ld 
bytes",
+          (int)i, (long)expected_size, (long)actual_size);
       return EINVAL;
     }
   }
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index 03bc381..1d2a721 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -201,10 +201,10 @@ const char* ArrowErrorMessage(struct ArrowError* error);
 /// @{
 
 /// \brief Return a version string in the form "major.minor.patch"
-const char* ArrowNanoarrowVersion();
+const char* ArrowNanoarrowVersion(void);
 
 /// \brief Return an integer that can be used to compare versions sequentially
-int ArrowNanoarrowVersionInt();
+int ArrowNanoarrowVersionInt(void);
 
 /// \brief Initialize a description of buffer arrangements from a storage type
 void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type);
diff --git a/src/nanoarrow/schema.c b/src/nanoarrow/schema.c
index 3ce6518..8b24fbe 100644
--- a/src/nanoarrow/schema.c
+++ b/src/nanoarrow/schema.c
@@ -1211,6 +1211,24 @@ static int64_t ArrowSchemaTypeToStringInternal(struct 
ArrowSchemaView* schema_vi
   }
 }
 
+// Helper for bookeeping to emulate sprintf()-like behaviour spread
+// among multiple sprintf calls.
+static inline void ArrowToStringLogChars(char** out, int64_t n_chars_last,
+                                         int64_t* n_remaining, int64_t* 
n_chars) {
+  *n_chars += n_chars_last;
+  *n_remaining -= n_chars_last;
+
+  // n_remaining is never less than 0
+  if (*n_remaining < 0) {
+    *n_remaining = 0;
+  }
+
+  // Can't do math on a NULL pointer
+  if (*out != NULL) {
+    *out += n_chars_last;
+  }
+}
+
 int64_t ArrowSchemaToString(struct ArrowSchema* schema, char* out, int64_t n,
                             char recursive) {
   if (schema == NULL) {
@@ -1237,90 +1255,59 @@ int64_t ArrowSchemaToString(struct ArrowSchema* schema, 
char* out, int64_t n,
 
   // Uncommon but not technically impossible that both are true
   if (is_extension && is_dictionary) {
-    n_chars_last = snprintf(out + n_chars, n, "%.*s{dictionary(%s)<",
-                            (int)schema_view.extension_name.size_bytes,
-                            schema_view.extension_name.data,
-                            ArrowTypeString(schema_view.storage_type));
+    n_chars_last = snprintf(
+        out, n, "%.*s{dictionary(%s)<", 
(int)schema_view.extension_name.size_bytes,
+        schema_view.extension_name.data, 
ArrowTypeString(schema_view.storage_type));
   } else if (is_extension) {
-    n_chars_last =
-        snprintf(out + n_chars, n, "%.*s{", 
(int)schema_view.extension_name.size_bytes,
-                 schema_view.extension_name.data);
+    n_chars_last = snprintf(out, n, "%.*s{", 
(int)schema_view.extension_name.size_bytes,
+                            schema_view.extension_name.data);
   } else if (is_dictionary) {
-    n_chars_last = snprintf(out + n_chars, n, "dictionary(%s)<",
-                            ArrowTypeString(schema_view.storage_type));
+    n_chars_last =
+        snprintf(out, n, "dictionary(%s)<", 
ArrowTypeString(schema_view.storage_type));
   }
 
-  n_chars += n_chars_last;
-  n -= n_chars_last;
-  if (n < 0) {
-    n = 0;
-  }
+  ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
 
   if (!is_dictionary) {
-    n_chars_last = ArrowSchemaTypeToStringInternal(&schema_view, out + 
n_chars, n);
+    n_chars_last = ArrowSchemaTypeToStringInternal(&schema_view, out, n);
   } else {
-    n_chars_last = ArrowSchemaToString(schema->dictionary, out + n_chars, n, 
recursive);
+    n_chars_last = ArrowSchemaToString(schema->dictionary, out, n, recursive);
   }
 
-  n_chars += n_chars_last;
-  n -= n_chars_last;
-  if (n < 0) {
-    n = 0;
-  }
+  ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
 
   if (recursive && schema->format[0] == '+') {
-    n_chars_last = snprintf(out + n_chars, n, "<");
-    n_chars += n_chars_last;
-    n -= n_chars_last;
-    if (n < 0) {
-      n = 0;
-    }
+    n_chars_last = snprintf(out, n, "<");
+    ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
 
     for (int64_t i = 0; i < schema->n_children; i++) {
       if (i > 0) {
-        n_chars_last = snprintf(out + n_chars, n, ", ");
-        n_chars += n_chars_last;
-        n -= n_chars_last;
-        if (n < 0) {
-          n = 0;
-        }
+        n_chars_last = snprintf(out, n, ", ");
+        ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
       }
 
       // ArrowSchemaToStringInternal() will validate the child and print the 
error,
       // but we need the name first
       if (schema->children[i] != NULL && schema->children[i]->release != NULL 
&&
           schema->children[i]->name != NULL) {
-        n_chars_last = snprintf(out + n_chars, n, "%s: ", 
schema->children[i]->name);
-        n_chars += n_chars_last;
-        n -= n_chars_last;
-        if (n < 0) {
-          n = 0;
-        }
+        n_chars_last = snprintf(out, n, "%s: ", schema->children[i]->name);
+        ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
       }
 
-      n_chars_last =
-          ArrowSchemaToString(schema->children[i], out + n_chars, n, 
recursive);
-      n_chars += n_chars_last;
-      n -= n_chars_last;
-      if (n < 0) {
-        n = 0;
-      }
+      n_chars_last = ArrowSchemaToString(schema->children[i], out, n, 
recursive);
+      ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
     }
 
-    n_chars_last = snprintf(out + n_chars, n, ">");
-    n_chars += n_chars_last;
-    n -= n_chars_last;
-    if (n < 0) {
-      n = 0;
-    }
+    n_chars_last = snprintf(out, n, ">");
+    ArrowToStringLogChars(&out, n_chars_last, &n, &n_chars);
   }
 
   if (is_extension && is_dictionary) {
-    n_chars += snprintf(out + n_chars, n, ">}");
+    n_chars += snprintf(out, n, ">}");
   } else if (is_extension) {
-    n_chars += snprintf(out + n_chars, n, "}");
+    n_chars += snprintf(out, n, "}");
   } else if (is_dictionary) {
-    n_chars += snprintf(out + n_chars, n, ">");
+    n_chars += snprintf(out, n, ">");
   }
 
   return n_chars;
diff --git a/src/nanoarrow/utils.c b/src/nanoarrow/utils.c
index fca0f80..b87c1c3 100644
--- a/src/nanoarrow/utils.c
+++ b/src/nanoarrow/utils.c
@@ -24,9 +24,9 @@
 
 #include "nanoarrow.h"
 
-const char* ArrowNanoarrowVersion() { return NANOARROW_VERSION; }
+const char* ArrowNanoarrowVersion(void) { return NANOARROW_VERSION; }
 
-int ArrowNanoarrowVersionInt() { return NANOARROW_VERSION_INT; }
+int ArrowNanoarrowVersionInt(void) { return NANOARROW_VERSION_INT; }
 
 int ArrowErrorSet(struct ArrowError* error, const char* fmt, ...) {
   if (error == NULL) {

[arrow-nanoarrow] branch main updated: fix: Explicit `stringsAsFactors = FALSE` for R <= 3.6 (#135)

Reply via email to