This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
     new ec495803d fix(c/driver/postgresql): handle overflow on binary-like 
fields (#3616)
ec495803d is described below

commit ec495803d8deb207b0e7fe07cec627dd5bc90d8d
Author: David Li <[email protected]>
AuthorDate: Mon Oct 27 15:50:06 2025 +0900

    fix(c/driver/postgresql): handle overflow on binary-like fields (#3616)
    
    Closes #3611.
---
 .../postgresql/copy/postgres_copy_reader_test.cc   | 56 ++++++++++++++++++++++
 c/driver/postgresql/copy/reader.h                  | 40 ++++++++++++----
 2 files changed, 86 insertions(+), 10 deletions(-)

diff --git a/c/driver/postgresql/copy/postgres_copy_reader_test.cc 
b/c/driver/postgresql/copy/postgres_copy_reader_test.cc
index e5ba5fc6c..18e487f9c 100644
--- a/c/driver/postgresql/copy/postgres_copy_reader_test.cc
+++ b/c/driver/postgresql/copy/postgres_copy_reader_test.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include <string>
+#include <vector>
 
 #include <gtest/gtest.h>
 #include <nanoarrow/nanoarrow.hpp>
@@ -922,4 +923,59 @@ TEST(PostgresCopyUtilsTest, PostgresCopyReadCustomRecord) {
   ASSERT_DOUBLE_EQ(data_buffer2[2], 0);
 }
 
+TEST(PostgresCopyUtilsTest, BinaryOverflow) {
+  constexpr int32_t field_size_bytes = 1024 * 1024 * 4;
+  std::vector<uint8_t> buffer(field_size_bytes);
+
+  nanoarrow::UniqueSchema schema;
+  ASSERT_EQ(NANOARROW_OK, ArrowSchemaInitFromType(schema.get(), 
NANOARROW_TYPE_BINARY));
+
+  nanoarrow::UniqueArray array;
+  ASSERT_EQ(NANOARROW_OK, ArrowArrayInitFromType(array.get(), 
NANOARROW_TYPE_BINARY));
+  ASSERT_EQ(NANOARROW_OK, ArrowArrayStartAppending(array.get()));
+
+  PostgresCopyBinaryFieldReader reader;
+  ASSERT_EQ(NANOARROW_OK, reader.InitSchema(schema.get()));
+  ASSERT_EQ(NANOARROW_OK, reader.InitArray(array.get()));
+  for (int i = 0; i < 511; i++) {
+    ArrowBufferView data;
+    data.size_bytes = static_cast<int64_t>(buffer.size());
+    data.data.data = buffer.data();
+    ASSERT_EQ(NANOARROW_OK, reader.Read(&data, field_size_bytes, array.get(), 
nullptr));
+  }
+  ArrowBufferView data;
+  data.size_bytes = static_cast<int64_t>(buffer.size());
+  data.data.data = buffer.data();
+  ASSERT_EQ(EOVERFLOW, reader.Read(&data, field_size_bytes, array.get(), 
nullptr));
+}
+
+TEST(PostgresCopyUtilsTest, JsonbOverflow) {
+  constexpr int32_t field_size_bytes = 1024 * 1024 * 4;
+  std::vector<uint8_t> buffer(field_size_bytes, 1);
+
+  nanoarrow::UniqueSchema schema;
+  ASSERT_EQ(NANOARROW_OK, ArrowSchemaInitFromType(schema.get(), 
NANOARROW_TYPE_BINARY));
+
+  nanoarrow::UniqueArray array;
+  ASSERT_EQ(NANOARROW_OK, ArrowArrayInitFromType(array.get(), 
NANOARROW_TYPE_BINARY));
+  ASSERT_EQ(NANOARROW_OK, ArrowArrayStartAppending(array.get()));
+
+  PostgresCopyJsonbFieldReader reader;
+  ASSERT_EQ(NANOARROW_OK, reader.InitSchema(schema.get()));
+  ASSERT_EQ(NANOARROW_OK, reader.InitArray(array.get()));
+  for (int i = 0; i < 512; i++) {
+    ArrowBufferView data;
+    data.size_bytes = static_cast<int64_t>(buffer.size());
+    data.data.data = buffer.data();
+    ASSERT_EQ(NANOARROW_OK, reader.Read(&data, field_size_bytes, array.get(), 
nullptr));
+  }
+  ArrowBufferView data;
+  data.size_bytes = static_cast<int64_t>(buffer.size());
+  data.data.data = buffer.data();
+  ASSERT_EQ(EOVERFLOW, reader.Read(&data, field_size_bytes, array.get(), 
nullptr));
+}
+
+// N.B. unfortunately testing with NUMERIC is difficult since we can't add all
+// that much to the buffer on each iteration, so the unit test takes too long
+
 }  // namespace adbcpq
diff --git a/c/driver/postgresql/copy/reader.h 
b/c/driver/postgresql/copy/reader.h
index d14f8752f..df9209a34 100644
--- a/c/driver/postgresql/copy/reader.h
+++ b/c/driver/postgresql/copy/reader.h
@@ -19,11 +19,13 @@
 
 #include <algorithm>
 #include <cinttypes>
+#include <limits>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>
 
+#include <portable-snippets/safe-math.h>
 #include <nanoarrow/nanoarrow.hpp>
 
 #include "../postgres_type.h"
@@ -315,9 +317,15 @@ class PostgresCopyNumericFieldReader : public 
PostgresCopyFieldReader {
     }
 
     if (!special_value.empty()) {
+      if ((data_->size_bytes + static_cast<int64_t>(special_value.size())) >
+          static_cast<int64_t>((std::numeric_limits<int32_t>::max)())) {
+        return EOVERFLOW;
+      }
+
       NANOARROW_RETURN_NOT_OK(
           ArrowBufferAppend(data_, special_value.data(), 
special_value.size()));
-      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(offsets_, 
data_->size_bytes));
+      NANOARROW_RETURN_NOT_OK(
+          ArrowBufferAppendInt32(offsets_, 
static_cast<int32_t>(data_->size_bytes)));
       return AppendValid(array);
     }
 
@@ -392,8 +400,13 @@ class PostgresCopyNumericFieldReader : public 
PostgresCopyFieldReader {
     }
 
     // Update data buffer size and add offsets
+    if ((data_->size_bytes + actual_chars_required) >
+        static_cast<int64_t>((std::numeric_limits<int32_t>::max)())) {
+      return EOVERFLOW;
+    }
     data_->size_bytes += actual_chars_required;
-    NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(offsets_, 
data_->size_bytes));
+    NANOARROW_RETURN_NOT_OK(
+        ArrowBufferAppendInt32(offsets_, 
static_cast<int32_t>(data_->size_bytes)));
     return AppendValid(array);
   }
 
@@ -431,14 +444,17 @@ class PostgresCopyBinaryFieldReader : public 
PostgresCopyFieldReader {
       return EINVAL;
     }
 
+    int32_t* offsets = reinterpret_cast<int32_t*>(offsets_->data);
+    int32_t next_offset = 0;
+    if (!psnip_safe_int32_add(&next_offset, offsets[array->length], 
field_size_bytes)) {
+      return EOVERFLOW;
+    }
+    NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(offsets_, next_offset));
+
     NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, data->data.data, 
field_size_bytes));
     data->data.as_uint8 += field_size_bytes;
     data->size_bytes -= field_size_bytes;
 
-    int32_t* offsets = reinterpret_cast<int32_t*>(offsets_->data);
-    NANOARROW_RETURN_NOT_OK(
-        ArrowBufferAppendInt32(offsets_, offsets[array->length] + 
field_size_bytes));
-
     return AppendValid(array);
   }
 };
@@ -472,14 +488,18 @@ class PostgresCopyJsonbFieldReader : public 
PostgresCopyFieldReader {
     }
 
     field_size_bytes -= 1;
+
+    int32_t* offsets = reinterpret_cast<int32_t*>(offsets_->data);
+    int32_t next_offset = 0;
+    if (!psnip_safe_int32_add(&next_offset, offsets[array->length], 
field_size_bytes)) {
+      return EOVERFLOW;
+    }
+    NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(offsets_, next_offset));
+
     NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, data->data.data, 
field_size_bytes));
     data->data.as_uint8 += field_size_bytes;
     data->size_bytes -= field_size_bytes;
 
-    int32_t* offsets = reinterpret_cast<int32_t*>(offsets_->data);
-    NANOARROW_RETURN_NOT_OK(
-        ArrowBufferAppendInt32(offsets_, offsets[array->length] + 
field_size_bytes));
-
     return AppendValid(array);
   }
 };

Reply via email to