This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new ec495803d fix(c/driver/postgresql): handle overflow on binary-like
fields (#3616)
ec495803d is described below
commit ec495803d8deb207b0e7fe07cec627dd5bc90d8d
Author: David Li <[email protected]>
AuthorDate: Mon Oct 27 15:50:06 2025 +0900
fix(c/driver/postgresql): handle overflow on binary-like fields (#3616)
Closes #3611.
---
.../postgresql/copy/postgres_copy_reader_test.cc | 56 ++++++++++++++++++++++
c/driver/postgresql/copy/reader.h | 40 ++++++++++++----
2 files changed, 86 insertions(+), 10 deletions(-)
diff --git a/c/driver/postgresql/copy/postgres_copy_reader_test.cc
b/c/driver/postgresql/copy/postgres_copy_reader_test.cc
index e5ba5fc6c..18e487f9c 100644
--- a/c/driver/postgresql/copy/postgres_copy_reader_test.cc
+++ b/c/driver/postgresql/copy/postgres_copy_reader_test.cc
@@ -16,6 +16,7 @@
// under the License.
#include <string>
+#include <vector>
#include <gtest/gtest.h>
#include <nanoarrow/nanoarrow.hpp>
@@ -922,4 +923,59 @@ TEST(PostgresCopyUtilsTest, PostgresCopyReadCustomRecord) {
ASSERT_DOUBLE_EQ(data_buffer2[2], 0);
}
+TEST(PostgresCopyUtilsTest, BinaryOverflow) {
+ constexpr int32_t field_size_bytes = 1024 * 1024 * 4;
+ std::vector<uint8_t> buffer(field_size_bytes);
+
+ nanoarrow::UniqueSchema schema;
+ ASSERT_EQ(NANOARROW_OK, ArrowSchemaInitFromType(schema.get(),
NANOARROW_TYPE_BINARY));
+
+ nanoarrow::UniqueArray array;
+ ASSERT_EQ(NANOARROW_OK, ArrowArrayInitFromType(array.get(),
NANOARROW_TYPE_BINARY));
+ ASSERT_EQ(NANOARROW_OK, ArrowArrayStartAppending(array.get()));
+
+ PostgresCopyBinaryFieldReader reader;
+ ASSERT_EQ(NANOARROW_OK, reader.InitSchema(schema.get()));
+ ASSERT_EQ(NANOARROW_OK, reader.InitArray(array.get()));
+ for (int i = 0; i < 511; i++) {
+ ArrowBufferView data;
+ data.size_bytes = static_cast<int64_t>(buffer.size());
+ data.data.data = buffer.data();
+ ASSERT_EQ(NANOARROW_OK, reader.Read(&data, field_size_bytes, array.get(),
nullptr));
+ }
+ ArrowBufferView data;
+ data.size_bytes = static_cast<int64_t>(buffer.size());
+ data.data.data = buffer.data();
+ ASSERT_EQ(EOVERFLOW, reader.Read(&data, field_size_bytes, array.get(),
nullptr));
+}
+
+TEST(PostgresCopyUtilsTest, JsonbOverflow) {
+ constexpr int32_t field_size_bytes = 1024 * 1024 * 4;
+ std::vector<uint8_t> buffer(field_size_bytes, 1);
+
+ nanoarrow::UniqueSchema schema;
+ ASSERT_EQ(NANOARROW_OK, ArrowSchemaInitFromType(schema.get(),
NANOARROW_TYPE_BINARY));
+
+ nanoarrow::UniqueArray array;
+ ASSERT_EQ(NANOARROW_OK, ArrowArrayInitFromType(array.get(),
NANOARROW_TYPE_BINARY));
+ ASSERT_EQ(NANOARROW_OK, ArrowArrayStartAppending(array.get()));
+
+ PostgresCopyJsonbFieldReader reader;
+ ASSERT_EQ(NANOARROW_OK, reader.InitSchema(schema.get()));
+ ASSERT_EQ(NANOARROW_OK, reader.InitArray(array.get()));
+ for (int i = 0; i < 512; i++) {
+ ArrowBufferView data;
+ data.size_bytes = static_cast<int64_t>(buffer.size());
+ data.data.data = buffer.data();
+ ASSERT_EQ(NANOARROW_OK, reader.Read(&data, field_size_bytes, array.get(),
nullptr));
+ }
+ ArrowBufferView data;
+ data.size_bytes = static_cast<int64_t>(buffer.size());
+ data.data.data = buffer.data();
+ ASSERT_EQ(EOVERFLOW, reader.Read(&data, field_size_bytes, array.get(),
nullptr));
+}
+
+// N.B. unfortunately testing with NUMERIC is difficult since we can't add all
+// that much to the buffer on each iteration, so the unit test takes too long
+
} // namespace adbcpq
diff --git a/c/driver/postgresql/copy/reader.h
b/c/driver/postgresql/copy/reader.h
index d14f8752f..df9209a34 100644
--- a/c/driver/postgresql/copy/reader.h
+++ b/c/driver/postgresql/copy/reader.h
@@ -19,11 +19,13 @@
#include <algorithm>
#include <cinttypes>
+#include <limits>
#include <memory>
#include <string>
#include <utility>
#include <vector>
+#include <portable-snippets/safe-math.h>
#include <nanoarrow/nanoarrow.hpp>
#include "../postgres_type.h"
@@ -315,9 +317,15 @@ class PostgresCopyNumericFieldReader : public
PostgresCopyFieldReader {
}
if (!special_value.empty()) {
+ if ((data_->size_bytes + static_cast<int64_t>(special_value.size())) >
+ static_cast<int64_t>((std::numeric_limits<int32_t>::max)())) {
+ return EOVERFLOW;
+ }
+
NANOARROW_RETURN_NOT_OK(
ArrowBufferAppend(data_, special_value.data(),
special_value.size()));
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(offsets_,
data_->size_bytes));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendInt32(offsets_,
static_cast<int32_t>(data_->size_bytes)));
return AppendValid(array);
}
@@ -392,8 +400,13 @@ class PostgresCopyNumericFieldReader : public
PostgresCopyFieldReader {
}
// Update data buffer size and add offsets
+ if ((data_->size_bytes + actual_chars_required) >
+ static_cast<int64_t>((std::numeric_limits<int32_t>::max)())) {
+ return EOVERFLOW;
+ }
data_->size_bytes += actual_chars_required;
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(offsets_,
data_->size_bytes));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendInt32(offsets_,
static_cast<int32_t>(data_->size_bytes)));
return AppendValid(array);
}
@@ -431,14 +444,17 @@ class PostgresCopyBinaryFieldReader : public
PostgresCopyFieldReader {
return EINVAL;
}
+ int32_t* offsets = reinterpret_cast<int32_t*>(offsets_->data);
+ int32_t next_offset = 0;
+ if (!psnip_safe_int32_add(&next_offset, offsets[array->length],
field_size_bytes)) {
+ return EOVERFLOW;
+ }
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(offsets_, next_offset));
+
NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, data->data.data,
field_size_bytes));
data->data.as_uint8 += field_size_bytes;
data->size_bytes -= field_size_bytes;
- int32_t* offsets = reinterpret_cast<int32_t*>(offsets_->data);
- NANOARROW_RETURN_NOT_OK(
- ArrowBufferAppendInt32(offsets_, offsets[array->length] +
field_size_bytes));
-
return AppendValid(array);
}
};
@@ -472,14 +488,18 @@ class PostgresCopyJsonbFieldReader : public
PostgresCopyFieldReader {
}
field_size_bytes -= 1;
+
+ int32_t* offsets = reinterpret_cast<int32_t*>(offsets_->data);
+ int32_t next_offset = 0;
+ if (!psnip_safe_int32_add(&next_offset, offsets[array->length],
field_size_bytes)) {
+ return EOVERFLOW;
+ }
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(offsets_, next_offset));
+
NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, data->data.data,
field_size_bytes));
data->data.as_uint8 += field_size_bytes;
data->size_bytes -= field_size_bytes;
- int32_t* offsets = reinterpret_cast<int32_t*>(offsets_->data);
- NANOARROW_RETURN_NOT_OK(
- ArrowBufferAppendInt32(offsets_, offsets[array->length] +
field_size_bytes));
-
return AppendValid(array);
}
};