This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 49e4816 fix: Return `EOVERFLOW` when appending to a string or binary
type would exeed 2 GB (#302)
49e4816 is described below
commit 49e48161226a4019aab57691740ad8efa1c3bbf2
Author: Dewey Dunnington <[email protected]>
AuthorDate: Thu Sep 21 23:06:48 2023 +0200
fix: Return `EOVERFLOW` when appending to a string or binary type would
exeed 2 GB (#302)
This aligns nanoarrow with the behaviour in the ADBC postgres driver. It
is useful to differentiate this case because frequently a caller will
want to finish the current array and start a new one with the new value.
---
src/nanoarrow/array_inline.h | 6 +++---
src/nanoarrow/array_test.cc | 30 ++++++++++++++++++++++++++++++
src/nanoarrow/nanoarrow.h | 18 +++++++++++-------
3 files changed, 44 insertions(+), 10 deletions(-)
diff --git a/src/nanoarrow/array_inline.h b/src/nanoarrow/array_inline.h
index 208462c..96fdf57 100644
--- a/src/nanoarrow/array_inline.h
+++ b/src/nanoarrow/array_inline.h
@@ -469,8 +469,8 @@ static inline ArrowErrorCode ArrowArrayAppendBytes(struct
ArrowArray* array,
case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_BINARY:
offset = ((int32_t*)offset_buffer->data)[array->length];
- if ((offset + value.size_bytes) > INT32_MAX) {
- return EINVAL;
+ if ((((int64_t)offset) + value.size_bytes) > INT32_MAX) {
+ return EOVERFLOW;
}
offset += (int32_t)value.size_bytes;
@@ -618,7 +618,7 @@ static inline ArrowErrorCode ArrowArrayFinishElement(struct
ArrowArray* array) {
case NANOARROW_TYPE_MAP:
child_length = array->children[0]->length;
if (child_length > INT32_MAX) {
- return EINVAL;
+ return EOVERFLOW;
}
NANOARROW_RETURN_NOT_OK(
ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1),
(int32_t)child_length));
diff --git a/src/nanoarrow/array_test.cc b/src/nanoarrow/array_test.cc
index ac99ecf..eb34243 100644
--- a/src/nanoarrow/array_test.cc
+++ b/src/nanoarrow/array_test.cc
@@ -878,6 +878,19 @@ TEST(ArrayTest, ArrayTestAppendToFixedSizeBinaryArray) {
EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe()));
}
+TEST(ArrayTest, ArrayTestAppendToBinaryArrayErrors) {
+ struct ArrowArray array;
+
+ ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_BINARY),
NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ struct ArrowBufferView item;
+ item.data.as_char = "";
+ item.size_bytes = static_cast<int64_t>(INT_MAX) + 1;
+ EXPECT_EQ(ArrowArrayAppendBytes(&array, item), EOVERFLOW);
+
+ array.release(&array);
+}
+
TEST(ArrayTest, ArrayTestAppendToIntervalArrayYearMonth) {
struct ArrowArray array;
@@ -1317,6 +1330,23 @@ TEST(ArrayTest, ArrayTestAppendToFixedSizeListArray) {
EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe()));
}
+TEST(ArrayTest, ArrayTestAppendToListArrayErrors) {
+ struct ArrowArray array;
+ struct ArrowSchema schema;
+ struct ArrowError error;
+
+ ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_LIST),
NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaSetType(schema.children[0], NANOARROW_TYPE_INT64),
NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+
+ array.children[0]->length = static_cast<int64_t>(INT32_MAX) + 1;
+ EXPECT_EQ(ArrowArrayFinishElement(&array), EOVERFLOW);
+
+ array.release(&array);
+ schema.release(&schema);
+}
+
TEST(ArrayTest, ArrayTestAppendToStructArray) {
struct ArrowArray array;
struct ArrowSchema schema;
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index 6f4d905..11bf6bb 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -863,18 +863,21 @@ static inline ArrowErrorCode
ArrowArrayAppendDouble(struct ArrowArray* array,
/// \brief Append a string of bytes to an array
///
/// Returns NANOARROW_OK if value can be exactly represented by
-/// the underlying storage type or EINVAL otherwise (e.g.,
-/// the underlying array is not a binary, string, large binary, large string,
-/// or fixed-size binary array, or value is the wrong size for a fixed-size
-/// binary array).
+/// the underlying storage type, EOVERFLOW if appending value would overflow
+/// the offset type (e.g., if the data buffer would be larger than 2 GB for a
+/// non-large string type), or EINVAL otherwise (e.g., the underlying array is
not a
+/// binary, string, large binary, large string, or fixed-size binary array, or
value is
+/// the wrong size for a fixed-size binary array).
static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
struct ArrowBufferView
value);
/// \brief Append a string value to an array
///
/// Returns NANOARROW_OK if value can be exactly represented by
-/// the underlying storage type or EINVAL otherwise (e.g.,
-/// the underlying array is not a string or large string array).
+/// the underlying storage type, EOVERFLOW if appending value would overflow
+/// the offset type (e.g., if the data buffer would be larger than 2 GB for a
+/// non-large string type), or EINVAL otherwise (e.g., the underlying array is
not a
+/// string or large string array).
static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array,
struct ArrowStringView
value);
@@ -895,7 +898,8 @@ static inline ArrowErrorCode ArrowArrayAppendDecimal(struct
ArrowArray* array,
/// \brief Finish a nested array element
///
/// Appends a non-null element to the array based on the first child's current
-/// length. Returns NANOARROW_OK if the item was successfully added or EINVAL
+/// length. Returns NANOARROW_OK if the item was successfully added, EOVERFLOW
+/// if the child of a list or map array would exceed INT_MAX elements, or
EINVAL
/// if the underlying storage type is not a struct, list, large list, or
fixed-size
/// list, or if there was an attempt to add a struct or fixed-size list
element where the
/// length of the child array(s) did not match the expected length.