This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 49e4816  fix: Return `EOVERFLOW` when appending to a string or binary 
type would exeed 2 GB (#302)
49e4816 is described below

commit 49e48161226a4019aab57691740ad8efa1c3bbf2
Author: Dewey Dunnington <[email protected]>
AuthorDate: Thu Sep 21 23:06:48 2023 +0200

    fix: Return `EOVERFLOW` when appending to a string or binary type would 
exeed 2 GB (#302)
    
    This aligns nanoarrow with the behaviour in the ADBC postgres driver. It
    is useful to differentiate this case because frequently a caller will
    want to finish the current array and start a new one with the new value.
---
 src/nanoarrow/array_inline.h |  6 +++---
 src/nanoarrow/array_test.cc  | 30 ++++++++++++++++++++++++++++++
 src/nanoarrow/nanoarrow.h    | 18 +++++++++++-------
 3 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/src/nanoarrow/array_inline.h b/src/nanoarrow/array_inline.h
index 208462c..96fdf57 100644
--- a/src/nanoarrow/array_inline.h
+++ b/src/nanoarrow/array_inline.h
@@ -469,8 +469,8 @@ static inline ArrowErrorCode ArrowArrayAppendBytes(struct 
ArrowArray* array,
     case NANOARROW_TYPE_STRING:
     case NANOARROW_TYPE_BINARY:
       offset = ((int32_t*)offset_buffer->data)[array->length];
-      if ((offset + value.size_bytes) > INT32_MAX) {
-        return EINVAL;
+      if ((((int64_t)offset) + value.size_bytes) > INT32_MAX) {
+        return EOVERFLOW;
       }
 
       offset += (int32_t)value.size_bytes;
@@ -618,7 +618,7 @@ static inline ArrowErrorCode ArrowArrayFinishElement(struct 
ArrowArray* array) {
     case NANOARROW_TYPE_MAP:
       child_length = array->children[0]->length;
       if (child_length > INT32_MAX) {
-        return EINVAL;
+        return EOVERFLOW;
       }
       NANOARROW_RETURN_NOT_OK(
           ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), 
(int32_t)child_length));
diff --git a/src/nanoarrow/array_test.cc b/src/nanoarrow/array_test.cc
index ac99ecf..eb34243 100644
--- a/src/nanoarrow/array_test.cc
+++ b/src/nanoarrow/array_test.cc
@@ -878,6 +878,19 @@ TEST(ArrayTest, ArrayTestAppendToFixedSizeBinaryArray) {
   EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe()));
 }
 
+TEST(ArrayTest, ArrayTestAppendToBinaryArrayErrors) {
+  struct ArrowArray array;
+
+  ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_BINARY), 
NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+  struct ArrowBufferView item;
+  item.data.as_char = "";
+  item.size_bytes = static_cast<int64_t>(INT_MAX) + 1;
+  EXPECT_EQ(ArrowArrayAppendBytes(&array, item), EOVERFLOW);
+
+  array.release(&array);
+}
+
 TEST(ArrayTest, ArrayTestAppendToIntervalArrayYearMonth) {
   struct ArrowArray array;
 
@@ -1317,6 +1330,23 @@ TEST(ArrayTest, ArrayTestAppendToFixedSizeListArray) {
   EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe()));
 }
 
+TEST(ArrayTest, ArrayTestAppendToListArrayErrors) {
+  struct ArrowArray array;
+  struct ArrowSchema schema;
+  struct ArrowError error;
+
+  ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_LIST), 
NANOARROW_OK);
+  ASSERT_EQ(ArrowSchemaSetType(schema.children[0], NANOARROW_TYPE_INT64), 
NANOARROW_OK);
+  ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), NANOARROW_OK);
+  ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+
+  array.children[0]->length = static_cast<int64_t>(INT32_MAX) + 1;
+  EXPECT_EQ(ArrowArrayFinishElement(&array), EOVERFLOW);
+
+  array.release(&array);
+  schema.release(&schema);
+}
+
 TEST(ArrayTest, ArrayTestAppendToStructArray) {
   struct ArrowArray array;
   struct ArrowSchema schema;
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index 6f4d905..11bf6bb 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -863,18 +863,21 @@ static inline ArrowErrorCode 
ArrowArrayAppendDouble(struct ArrowArray* array,
 /// \brief Append a string of bytes to an array
 ///
 /// Returns NANOARROW_OK if value can be exactly represented by
-/// the underlying storage type or EINVAL otherwise (e.g.,
-/// the underlying array is not a binary, string, large binary, large string,
-/// or fixed-size binary array, or value is the wrong size for a fixed-size
-/// binary array).
+/// the underlying storage type, EOVERFLOW if appending value would overflow
+/// the offset type (e.g., if the data buffer would be larger than 2 GB for a
+/// non-large string type), or EINVAL otherwise (e.g., the underlying array is 
not a
+/// binary, string, large binary, large string, or fixed-size binary array, or 
value is
+/// the wrong size for a fixed-size binary array).
 static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
                                                    struct ArrowBufferView 
value);
 
 /// \brief Append a string value to an array
 ///
 /// Returns NANOARROW_OK if value can be exactly represented by
-/// the underlying storage type or EINVAL otherwise (e.g.,
-/// the underlying array is not a string or large string array).
+/// the underlying storage type, EOVERFLOW if appending value would overflow
+/// the offset type (e.g., if the data buffer would be larger than 2 GB for a
+/// non-large string type), or EINVAL otherwise (e.g., the underlying array is 
not a
+/// string or large string array).
 static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array,
                                                     struct ArrowStringView 
value);
 
@@ -895,7 +898,8 @@ static inline ArrowErrorCode ArrowArrayAppendDecimal(struct 
ArrowArray* array,
 /// \brief Finish a nested array element
 ///
 /// Appends a non-null element to the array based on the first child's current
-/// length. Returns NANOARROW_OK if the item was successfully added or EINVAL
+/// length. Returns NANOARROW_OK if the item was successfully added, EOVERFLOW
+/// if the child of a list or map array would exceed INT_MAX elements, or 
EINVAL
 /// if the underlying storage type is not a struct, list, large list, or 
fixed-size
 /// list, or if there was an attempt to add a struct or fixed-size list 
element where the
 /// length of the child array(s) did not match the expected length.

Reply via email to