This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 11205f14f feat(c/driver/postgresql): improve conversion of decimal to
numeric (#3787)
11205f14f is described below
commit 11205f14f0440d04fc98bcdd4829acc80333a323
Author: Mandukhai Alimaa <[email protected]>
AuthorDate: Wed Jan 21 19:49:33 2026 -0600
feat(c/driver/postgresql): improve conversion of decimal to numeric (#3787)
The logic for converting Arrow Decimal type to the PostgreSQL has been
refactored to fix the data not being inserted correctly when the scale
is not a multiple of 4.
Adds new test cases covering various scales and zero padding scenarios.
Closes #3485.
---------
Co-authored-by: David Li <[email protected]>
---
.../postgresql/copy/postgres_copy_writer_test.cc | 584 ++++++++++++++++++++-
c/driver/postgresql/copy/writer.h | 282 ++++++++--
.../validation/queries/ingest/decimal.toml | 19 -
.../validation/queries/ingest/decimal.txtcase | 45 ++
.../validation/queries/type/bind/decimal.toml | 19 -
.../validation/queries/type/bind/decimal.txtcase | 32 ++
6 files changed, 872 insertions(+), 109 deletions(-)
diff --git a/c/driver/postgresql/copy/postgres_copy_writer_test.cc
b/c/driver/postgresql/copy/postgres_copy_writer_test.cc
index cd8cb3008..f38bb686c 100644
--- a/c/driver/postgresql/copy/postgres_copy_writer_test.cc
+++ b/c/driver/postgresql/copy/postgres_copy_writer_test.cc
@@ -435,18 +435,32 @@ TEST_F(PostgresCopyTest, PostgresCopyWriteTime) {
// This buffer is similar to the read variant above but removes special values
// nan, ±inf as they are not supported via the Arrow Decimal types
-// COPY (SELECT CAST(col AS NUMERIC) AS col FROM ( VALUES (NULL), (-123.456),
-// ('0.00001234'), (1.0000), (123.456), (1000000)) AS drvd(col))
+// COPY (SELECT CAST(col AS NUMERIC) AS col FROM (VALUES
+// (NULL), (999999999999999999999999999999.99999999),
+// (-999999999999999999999999999999.99999999),
+// (0), (1234), (92233720368.54775807), (-92233720368.54775808),
+// (-123.456), ('0.00001234'), (1), (123.456), (1000000)) AS drvd(col))
// TO STDOUT WITH (FORMAT binary);
static uint8_t kTestPgCopyNumericWrite[] = {
0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00,
0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0x00,
0x01, 0x00,
- 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x40, 0x00, 0x00, 0x03, 0x00,
0x7b, 0x11,
- 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0xff, 0xfe, 0x00,
0x00, 0x00,
- 0x08, 0x04, 0xd2, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00,
0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00,
0x02, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, 0xd0, 0x00, 0x01, 0x00,
0x00, 0x00,
- 0x0a, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0xff,
0xff};
+ 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x00, 0x00, 0x08, 0x00,
0x63, 0x27,
+ 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27,
0x0f, 0x27,
+ 0x0f, 0x27, 0x0f, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00,
0x07, 0x40,
+ 0x00, 0x00, 0x08, 0x00, 0x63, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27,
0x0f, 0x27,
+ 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x00, 0x01, 0x00,
0x00, 0x00,
+ 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
0x00, 0x00,
+ 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xd2, 0x00,
0x01, 0x00,
+ 0x00, 0x00, 0x12, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x03,
0x9a, 0x0d,
+ 0x2c, 0x01, 0x70, 0x15, 0x65, 0x16, 0xaf, 0x00, 0x01, 0x00, 0x00, 0x00,
0x12, 0x00,
+ 0x05, 0x00, 0x02, 0x40, 0x00, 0x00, 0x08, 0x03, 0x9a, 0x0d, 0x2c, 0x01,
0x70, 0x15,
+ 0x65, 0x16, 0xb0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00,
0x00, 0x40,
+ 0x00, 0x00, 0x03, 0x00, 0x7b, 0x11, 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00,
0x0a, 0x00,
+ 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x08, 0x04, 0xd2, 0x00, 0x01, 0x00,
0x00, 0x00,
+ 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
0x01, 0x00,
+ 0x00, 0x00, 0x0c, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00,
0x7b, 0x11,
+ 0xd0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x01, 0x00,
0x00, 0x00,
+ 0x00, 0x00, 0x64, 0xff, 0xff};
TEST_F(PostgresCopyTest, PostgresCopyWriteNumeric) {
adbc_validation::Handle<struct ArrowSchema> schema;
@@ -462,20 +476,52 @@ TEST_F(PostgresCopyTest, PostgresCopyWriteNumeric) {
struct ArrowDecimal decimal3;
struct ArrowDecimal decimal4;
struct ArrowDecimal decimal5;
-
- ArrowDecimalInit(&decimal1, size, 19, 8);
+ struct ArrowDecimal decimal_max_64;
+ struct ArrowDecimal decimal_min_64;
+ struct ArrowDecimal decimal_zero;
+ struct ArrowDecimal decimal_no_frac;
+ struct ArrowDecimal decimal_max_128;
+ struct ArrowDecimal decimal_min_128;
+
+ ArrowDecimalInit(&decimal1, size, precision, scale);
ArrowDecimalSetInt(&decimal1, -12345600000);
- ArrowDecimalInit(&decimal2, size, 19, 8);
+ ArrowDecimalInit(&decimal2, size, precision, scale);
ArrowDecimalSetInt(&decimal2, 1234);
- ArrowDecimalInit(&decimal3, size, 19, 8);
+ ArrowDecimalInit(&decimal3, size, precision, scale);
ArrowDecimalSetInt(&decimal3, 100000000);
- ArrowDecimalInit(&decimal4, size, 19, 8);
+ ArrowDecimalInit(&decimal4, size, precision, scale);
ArrowDecimalSetInt(&decimal4, 12345600000);
- ArrowDecimalInit(&decimal5, size, 19, 8);
+ ArrowDecimalInit(&decimal5, size, precision, scale);
ArrowDecimalSetInt(&decimal5, 100000000000000);
+ ArrowDecimalInit(&decimal_max_64, size, precision, scale);
+ ArrowDecimalSetInt(&decimal_max_64, 9223372036854775807LL);
+
+ ArrowDecimalInit(&decimal_min_64, size, precision, scale);
+ ArrowDecimalSetInt(&decimal_min_64, -9223372036854775807LL - 1);
+
+ ArrowDecimalInit(&decimal_zero, size, precision, scale);
+ ArrowDecimalSetInt(&decimal_zero, 0);
+
+ ArrowDecimalInit(&decimal_no_frac, size, precision, scale);
+ ArrowDecimalSetInt(&decimal_no_frac, 123400000000LL); // 1234 * 10^8
+
+ ArrowDecimalInit(&decimal_max_128, size, precision, scale);
+ struct ArrowStringView max_digits_8;
+ max_digits_8.data = "99999999999999999999999999999999999999";
+ max_digits_8.size_bytes = 38;
+ ArrowDecimalSetDigits(&decimal_max_128, max_digits_8);
+
+ ArrowDecimalInit(&decimal_min_128, size, precision, scale);
+ struct ArrowStringView min_digits_8;
+ min_digits_8.data = "-99999999999999999999999999999999999999";
+ min_digits_8.size_bytes = 39;
+ ArrowDecimalSetDigits(&decimal_min_128, min_digits_8);
+
const std::vector<std::optional<ArrowDecimal*>> values = {
- std::nullopt, &decimal1, &decimal2, &decimal3, &decimal4, &decimal5};
+ std::nullopt, &decimal_max_128, &decimal_min_128, &decimal_zero,
+ &decimal_no_frac, &decimal_max_64, &decimal_min_64, &decimal1,
+ &decimal2, &decimal3, &decimal4, &decimal5};
ArrowSchemaInit(&schema.value);
ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0);
@@ -500,6 +546,514 @@ TEST_F(PostgresCopyTest, PostgresCopyWriteNumeric) {
}
}
+// Regression test for bug where 44.123456 with Decimal(10,6) became
4412.345500
+// COPY (SELECT CAST(col AS NUMERIC) AS col FROM (VALUES
+// (99999999999999999999999999999999.999999),
+// (-99999999999999999999999999999999.999999),
+// (0), (1000000000000), (9223372036854.775807), (-9223372036854.775808),
+// (44.123456), (0.123456), (123.456789)) AS drvd(col)) TO STDOUT WITH (FORMAT
binary);
+static uint8_t kTestPgCopyNumericScale6[] = {
+ 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00,
0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00,
0x0a, 0x00,
+ 0x07, 0x00, 0x00, 0x00, 0x06, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27,
0x0f, 0x27,
+ 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x26, 0xac, 0x00,
0x01, 0x00,
+ 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, 0x07, 0x40, 0x00, 0x00, 0x06, 0x27,
0x0f, 0x27,
+ 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27,
0x0f, 0x27,
+ 0x0f, 0x26, 0xac, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00,
0x03, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00,
0x06, 0x00,
+ 0x03, 0x00, 0x00, 0x00, 0x06, 0x00, 0x09, 0x08, 0xb9, 0x1c, 0x23, 0x1a,
0xc6, 0x1e,
+ 0x4e, 0x02, 0xbc, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x06, 0x00,
0x03, 0x40,
+ 0x00, 0x00, 0x06, 0x00, 0x09, 0x08, 0xb9, 0x1c, 0x23, 0x1a, 0xc6, 0x1e,
0x4e, 0x03,
+ 0x20, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00,
0x00, 0x00,
+ 0x06, 0x00, 0x2c, 0x04, 0xd2, 0x15, 0xe0, 0x00, 0x01, 0x00, 0x00, 0x00,
0x0c, 0x00,
+ 0x02, 0xff, 0xff, 0x00, 0x00, 0x00, 0x06, 0x04, 0xd2, 0x15, 0xe0, 0x00,
0x01, 0x00,
+ 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00,
0x7b, 0x11,
+ 0xd7, 0x22, 0xc4, 0xff, 0xff};
+
+TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale6) {
+ adbc_validation::Handle<struct ArrowSchema> schema;
+ adbc_validation::Handle<struct ArrowArray> array;
+ struct ArrowError na_error;
+ constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128;
+ constexpr int32_t size = 128;
+ constexpr int32_t precision = 38;
+ constexpr int32_t scale = 6;
+
+ struct ArrowDecimal decimal1;
+ struct ArrowDecimal decimal2;
+ struct ArrowDecimal decimal3;
+ struct ArrowDecimal decimal_max_64;
+ struct ArrowDecimal decimal_min_64;
+ struct ArrowDecimal decimal_zero;
+ struct ArrowDecimal decimal_no_frac;
+ struct ArrowDecimal decimal_max_128;
+ struct ArrowDecimal decimal_min_128;
+
+ ArrowDecimalInit(&decimal1, size, precision, scale);
+ ArrowDecimalSetInt(&decimal1, 44123456);
+
+ ArrowDecimalInit(&decimal2, size, precision, scale);
+ ArrowDecimalSetInt(&decimal2, 123456);
+
+ ArrowDecimalInit(&decimal3, size, precision, scale);
+ ArrowDecimalSetInt(&decimal3, 123456789);
+
+ ArrowDecimalInit(&decimal_max_64, size, precision, scale);
+ ArrowDecimalSetInt(&decimal_max_64, 9223372036854775807LL);
+
+ ArrowDecimalInit(&decimal_min_64, size, precision, scale);
+ ArrowDecimalSetInt(&decimal_min_64, -9223372036854775807LL - 1);
+
+ ArrowDecimalInit(&decimal_zero, size, precision, scale);
+ ArrowDecimalSetInt(&decimal_zero, 0);
+
+ ArrowDecimalInit(&decimal_no_frac, size, precision, scale);
+ ArrowDecimalSetInt(&decimal_no_frac, 1000000000000000000LL);
+
+ ArrowDecimalInit(&decimal_max_128, size, precision, scale);
+ struct ArrowStringView max_digits;
+ max_digits.data = "99999999999999999999999999999999999999";
+ max_digits.size_bytes = 38;
+ ArrowDecimalSetDigits(&decimal_max_128, max_digits);
+
+ ArrowDecimalInit(&decimal_min_128, size, precision, scale);
+ struct ArrowStringView min_digits;
+ min_digits.data = "-99999999999999999999999999999999999999";
+ min_digits.size_bytes = 39; // 38 digits + 1 for '-' sign
+ ArrowDecimalSetDigits(&decimal_min_128, min_digits);
+
+ const std::vector<std::optional<ArrowDecimal*>> values = {
+ &decimal_max_128, &decimal_min_128, &decimal_zero,
+ &decimal_no_frac, &decimal_max_64, &decimal_min_64,
+ &decimal1, &decimal2, &decimal3};
+
+ ArrowSchemaInit(&schema.value);
+ ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0);
+ ASSERT_EQ(ArrowSchemaSetTypeDecimal(schema.value.children[0], type,
precision, scale),
+ 0);
+ ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0);
+ ASSERT_EQ(adbc_validation::MakeBatch<ArrowDecimal*>(&schema.value,
&array.value,
+ &na_error, values),
+ ADBC_STATUS_OK);
+
+ PostgresCopyStreamWriteTester tester;
+ ASSERT_EQ(tester.Init(&schema.value, &array.value, *type_resolver_),
NANOARROW_OK);
+ ASSERT_EQ(tester.WriteAll(nullptr), ENODATA);
+
+ const struct ArrowBuffer buf = tester.WriteBuffer();
+
+ constexpr size_t buf_size = sizeof(kTestPgCopyNumericScale6) - 2;
+ ASSERT_EQ(buf.size_bytes, static_cast<int64_t>(buf_size));
+
+ for (size_t i = 0; i < buf_size; i++) {
+ ASSERT_EQ(buf.data[i], kTestPgCopyNumericScale6[i]) << " at position " <<
i;
+ }
+}
+
+// Test for scale=5 (remainder 1 when divided by 4)
+// COPY (SELECT CAST(col AS NUMERIC) AS col FROM (VALUES
+// (999999999999999999999999999999999.99999),
+// (-999999999999999999999999999999999.99999),
+// (0), (10000000000000), (92233720368547.75807), (-92233720368547.75808),
+// (12.34567), (-9.87654), (0.00123)) AS drvd(col)) TO STDOUT WITH (FORMAT
binary);
+static uint8_t kTestPgCopyNumericScale5[] = {
+ 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00,
0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1e, 0x00,
0x0b, 0x00,
+ 0x08, 0x00, 0x00, 0x00, 0x05, 0x00, 0x09, 0x27, 0x0f, 0x27, 0x0f, 0x27,
0x0f, 0x27,
+ 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x23,
0x28, 0x00,
+ 0x01, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x0b, 0x00, 0x08, 0x40, 0x00, 0x00,
0x05, 0x00,
+ 0x09, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27,
0x0f, 0x27,
+ 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x23, 0x28, 0x00, 0x01, 0x00, 0x00, 0x00,
0x08, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x0a, 0x00,
+ 0x01, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00,
0x00, 0x00,
+ 0x14, 0x00, 0x06, 0x00, 0x03, 0x00, 0x00, 0x00, 0x05, 0x00, 0x5c, 0x09,
0x21, 0x07,
+ 0xf4, 0x21, 0x63, 0x1d, 0x9c, 0x1b, 0x58, 0x00, 0x01, 0x00, 0x00, 0x00,
0x14, 0x00,
+ 0x06, 0x00, 0x03, 0x40, 0x00, 0x00, 0x05, 0x00, 0x5c, 0x09, 0x21, 0x07,
0xf4, 0x21,
+ 0x63, 0x1d, 0x9c, 0x1f, 0x40, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00,
0x03, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x0c, 0x0d, 0x80, 0x1b, 0x58, 0x00,
0x01, 0x00,
+ 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x40, 0x00, 0x00, 0x05, 0x00,
0x09, 0x22,
+ 0x3d, 0x0f, 0xa0, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x02, 0xff,
0xff, 0x00,
+ 0x00, 0x00, 0x05, 0x00, 0x0c, 0x0b, 0xb8, 0xff, 0xff};
+
+TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale5) {
+ adbc_validation::Handle<struct ArrowSchema> schema;
+ adbc_validation::Handle<struct ArrowArray> array;
+ struct ArrowError na_error;
+ constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128;
+ constexpr int32_t size = 128;
+ constexpr int32_t precision = 38;
+ constexpr int32_t scale = 5;
+
+ struct ArrowDecimal decimal1;
+ struct ArrowDecimal decimal2;
+ struct ArrowDecimal decimal3;
+ struct ArrowDecimal decimal_max_64;
+ struct ArrowDecimal decimal_min_64;
+ struct ArrowDecimal decimal_zero;
+ struct ArrowDecimal decimal_no_frac;
+ struct ArrowDecimal decimal_max_128;
+ struct ArrowDecimal decimal_min_128;
+
+ ArrowDecimalInit(&decimal1, size, precision, scale);
+ ArrowDecimalSetInt(&decimal1, 1234567);
+
+ ArrowDecimalInit(&decimal2, size, precision, scale);
+ ArrowDecimalSetInt(&decimal2, -987654);
+
+ ArrowDecimalInit(&decimal3, size, precision, scale);
+ ArrowDecimalSetInt(&decimal3, 123);
+
+ ArrowDecimalInit(&decimal_max_64, size, precision, scale);
+ ArrowDecimalSetInt(&decimal_max_64, 9223372036854775807LL);
+
+ ArrowDecimalInit(&decimal_min_64, size, precision, scale);
+ ArrowDecimalSetInt(&decimal_min_64, -9223372036854775807LL - 1);
+
+ ArrowDecimalInit(&decimal_zero, size, precision, scale);
+ ArrowDecimalSetInt(&decimal_zero, 0);
+
+ ArrowDecimalInit(&decimal_no_frac, size, precision, scale);
+ ArrowDecimalSetInt(&decimal_no_frac, 1000000000000000000LL);
+
+ ArrowDecimalInit(&decimal_max_128, size, precision, scale);
+ struct ArrowStringView max_digits_5;
+ max_digits_5.data = "99999999999999999999999999999999999999";
+ max_digits_5.size_bytes = 38;
+ ArrowDecimalSetDigits(&decimal_max_128, max_digits_5);
+
+ ArrowDecimalInit(&decimal_min_128, size, precision, scale);
+ struct ArrowStringView min_digits_5;
+ min_digits_5.data = "-99999999999999999999999999999999999999";
+ min_digits_5.size_bytes = 39;
+ ArrowDecimalSetDigits(&decimal_min_128, min_digits_5);
+
+ const std::vector<std::optional<ArrowDecimal*>> values = {
+ &decimal_max_128, &decimal_min_128, &decimal_zero,
+ &decimal_no_frac, &decimal_max_64, &decimal_min_64,
+ &decimal1, &decimal2, &decimal3};
+
+ ArrowSchemaInit(&schema.value);
+ ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0);
+ ASSERT_EQ(ArrowSchemaSetTypeDecimal(schema.value.children[0], type,
precision, scale),
+ 0);
+ ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0);
+ ASSERT_EQ(adbc_validation::MakeBatch<ArrowDecimal*>(&schema.value,
&array.value,
+ &na_error, values),
+ ADBC_STATUS_OK);
+
+ PostgresCopyStreamWriteTester tester;
+ ASSERT_EQ(tester.Init(&schema.value, &array.value, *type_resolver_),
NANOARROW_OK);
+ ASSERT_EQ(tester.WriteAll(nullptr), ENODATA);
+
+ const struct ArrowBuffer buf = tester.WriteBuffer();
+ constexpr size_t buf_size = sizeof(kTestPgCopyNumericScale5) - 2;
+ ASSERT_EQ(buf.size_bytes, static_cast<int64_t>(buf_size));
+ for (size_t i = 0; i < buf_size; i++) {
+ ASSERT_EQ(buf.data[i], kTestPgCopyNumericScale5[i]) << " at position " <<
i;
+ }
+}
+
+// Test for scale=7 (remainder 3 when divided by 4)
+// COPY (SELECT CAST(col AS NUMERIC) AS col FROM (VALUES
+// (9999999999999999999999999999999.9999999),
+// (-9999999999999999999999999999999.9999999),
+// (0), (1000), (922337203685.4775807), (-922337203685.4775808),
+// (5.1234567), (-123.456789), (0.0000001)) AS drvd(col)) TO STDOUT WITH
(FORMAT binary);
+static uint8_t kTestPgCopyNumericScale7[] = {
+ 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00,
0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00,
0x0a, 0x00,
+ 0x07, 0x00, 0x00, 0x00, 0x07, 0x03, 0xe7, 0x27, 0x0f, 0x27, 0x0f, 0x27,
0x0f, 0x27,
+ 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x06, 0x00,
0x01, 0x00,
+ 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, 0x07, 0x40, 0x00, 0x00, 0x07, 0x03,
0xe7, 0x27,
+ 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27,
0x0f, 0x27,
+ 0x0f, 0x27, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00,
0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x03, 0xe8, 0x00, 0x01, 0x00, 0x00, 0x00, 0x12, 0x00,
0x05, 0x00,
+ 0x02, 0x00, 0x00, 0x00, 0x07, 0x24, 0x07, 0x0e, 0x88, 0x0e, 0x65, 0x12,
0xa7, 0x1f,
+ 0x86, 0x00, 0x01, 0x00, 0x00, 0x00, 0x12, 0x00, 0x05, 0x00, 0x02, 0x40,
0x00, 0x00,
+ 0x07, 0x24, 0x07, 0x0e, 0x88, 0x0e, 0x65, 0x12, 0xa7, 0x1f, 0x90, 0x00,
0x01, 0x00,
+ 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00,
0x05, 0x04,
+ 0xd2, 0x16, 0x26, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00,
0x00, 0x40,
+ 0x00, 0x00, 0x06, 0x00, 0x7b, 0x11, 0xd7, 0x22, 0xc4, 0x00, 0x01, 0x00,
0x00, 0x00,
+ 0x0a, 0x00, 0x01, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x07, 0x00, 0x0a, 0xff,
0xff};
+
+TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale7) {
+ adbc_validation::Handle<struct ArrowSchema> schema;
+ adbc_validation::Handle<struct ArrowArray> array;
+ struct ArrowError na_error;
+ constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128;
+ constexpr int32_t size = 128;
+ constexpr int32_t precision = 38;
+ constexpr int32_t scale = 7;
+
+ struct ArrowDecimal decimal1;
+ struct ArrowDecimal decimal2;
+ struct ArrowDecimal decimal3;
+ struct ArrowDecimal decimal_max_64;
+ struct ArrowDecimal decimal_min_64;
+ struct ArrowDecimal decimal_zero;
+ struct ArrowDecimal decimal_no_frac;
+ struct ArrowDecimal decimal_max_128;
+ struct ArrowDecimal decimal_min_128;
+
+ ArrowDecimalInit(&decimal1, size, precision, scale);
+ ArrowDecimalSetInt(&decimal1, 51234567);
+
+ // This represents -123.456789, but NUMERIC(10,7) will display it as
-123.4567890
+ ArrowDecimalInit(&decimal2, size, precision, scale);
+ ArrowDecimalSetInt(&decimal2, -1234567890);
+
+ // 0.0000001 with scale=7 -> internal value: 1
+ ArrowDecimalInit(&decimal3, size, precision, scale);
+ ArrowDecimalSetInt(&decimal3, 1);
+
+ ArrowDecimalInit(&decimal_max_64, size, precision, scale);
+ ArrowDecimalSetInt(&decimal_max_64, 9223372036854775807LL);
+
+ ArrowDecimalInit(&decimal_min_64, size, precision, scale);
+ ArrowDecimalSetInt(&decimal_min_64, -9223372036854775807LL - 1);
+
+ ArrowDecimalInit(&decimal_zero, size, precision, scale);
+ ArrowDecimalSetInt(&decimal_zero, 0);
+
+ ArrowDecimalInit(&decimal_no_frac, size, precision, scale);
+ ArrowDecimalSetInt(&decimal_no_frac, 10000000000LL); // 1000 * 10^7
(1000.0000000)
+
+ ArrowDecimalInit(&decimal_max_128, size, precision, scale);
+ struct ArrowStringView max_digits_7;
+ max_digits_7.data = "99999999999999999999999999999999999999";
+ max_digits_7.size_bytes = 38;
+ ArrowDecimalSetDigits(&decimal_max_128, max_digits_7);
+
+ ArrowDecimalInit(&decimal_min_128, size, precision, scale);
+ struct ArrowStringView min_digits_7;
+ min_digits_7.data = "-99999999999999999999999999999999999999";
+ min_digits_7.size_bytes = 39;
+ ArrowDecimalSetDigits(&decimal_min_128, min_digits_7);
+
+ const std::vector<std::optional<ArrowDecimal*>> values = {
+ &decimal_max_128, &decimal_min_128, &decimal_zero,
+ &decimal_no_frac, &decimal_max_64, &decimal_min_64,
+ &decimal1, &decimal2, &decimal3};
+
+ ArrowSchemaInit(&schema.value);
+ ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0);
+ ASSERT_EQ(ArrowSchemaSetTypeDecimal(schema.value.children[0], type,
precision, scale),
+ 0);
+ ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0);
+ ASSERT_EQ(adbc_validation::MakeBatch<ArrowDecimal*>(&schema.value,
&array.value,
+ &na_error, values),
+ ADBC_STATUS_OK);
+
+ PostgresCopyStreamWriteTester tester;
+ ASSERT_EQ(tester.Init(&schema.value, &array.value, *type_resolver_),
NANOARROW_OK);
+ ASSERT_EQ(tester.WriteAll(nullptr), ENODATA);
+
+ const struct ArrowBuffer buf = tester.WriteBuffer();
+ constexpr size_t buf_size = sizeof(kTestPgCopyNumericScale7) - 2;
+
+ ASSERT_EQ(buf.size_bytes, static_cast<int64_t>(buf_size));
+ for (size_t i = 0; i < buf_size; i++) {
+ ASSERT_EQ(buf.data[i], kTestPgCopyNumericScale7[i]) << " at position " <<
i;
+ }
+}
+
+// Test for scale=0 (integers)
+// COPY (SELECT CAST(col AS NUMERIC) AS col FROM (VALUES
+// (99999999999999999999999999999999999999),
+// (-99999999999999999999999999999999999999),
+// (0), (1000000000000000000000000000000000), (9223372036854775807),
+// (-9223372036854775808), (1), (100), (1000), (-100000)) AS drvd(col))
+// TO STDOUT WITH (FORMAT binary);
+static uint8_t kTestPgCopyNumericScale0[] = {
+ 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00,
0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00,
0x0a, 0x00,
+ 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x63, 0x27, 0x0f, 0x27, 0x0f, 0x27,
0x0f, 0x27,
+ 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x00,
0x01, 0x00,
+ 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, 0x09, 0x40, 0x00, 0x00, 0x00, 0x00,
0x63, 0x27,
+ 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27,
0x0f, 0x27,
+ 0x0f, 0x27, 0x0f, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00,
0x08, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x12, 0x00,
0x05, 0x00,
+ 0x04, 0x00, 0x00, 0x00, 0x00, 0x03, 0x9a, 0x0d, 0x2c, 0x01, 0x70, 0x15,
0x65, 0x16,
+ 0xaf, 0x00, 0x01, 0x00, 0x00, 0x00, 0x12, 0x00, 0x05, 0x00, 0x04, 0x40,
0x00, 0x00,
+ 0x00, 0x03, 0x9a, 0x0d, 0x2c, 0x01, 0x70, 0x15, 0x65, 0x16, 0xb0, 0x00,
0x01, 0x00,
+ 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x00,
+ 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00,
+ 0x64, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00,
+ 0x00, 0x03, 0xe8, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x01, 0x00,
0x01, 0x40,
+ 0x00, 0x00, 0x00, 0x00, 0x0a, 0xff, 0xff};
+
+TEST_F(PostgresCopyTest, PostgresCopyWriteNumericScale0) {
+ adbc_validation::Handle<struct ArrowSchema> schema;
+ adbc_validation::Handle<struct ArrowArray> array;
+ struct ArrowError na_error;
+ constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128;
+ constexpr int32_t size = 128;
+ constexpr int32_t precision = 38;
+ constexpr int32_t scale = 0;
+
+ struct ArrowDecimal decimal0;
+ struct ArrowDecimal decimal1;
+ struct ArrowDecimal decimal2;
+ struct ArrowDecimal decimal3;
+ struct ArrowDecimal decimal4;
+ struct ArrowDecimal decimal_max_64;
+ struct ArrowDecimal decimal_min_64;
+ struct ArrowDecimal decimal_max_128;
+ struct ArrowDecimal decimal_min_128;
+ struct ArrowDecimal decimal_no_frac;
+
+ ArrowDecimalInit(&decimal0, size, precision, scale);
+ ArrowDecimalSetInt(&decimal0, 0);
+
+ ArrowDecimalInit(&decimal1, size, precision, scale);
+ ArrowDecimalSetInt(&decimal1, 1);
+
+ ArrowDecimalInit(&decimal2, size, precision, scale);
+ ArrowDecimalSetInt(&decimal2, 100);
+
+ ArrowDecimalInit(&decimal3, size, precision, scale);
+ ArrowDecimalSetInt(&decimal3, 1000);
+
+ ArrowDecimalInit(&decimal4, size, precision, scale);
+ ArrowDecimalSetInt(&decimal4, -100000);
+
+ ArrowDecimalInit(&decimal_max_64, size, precision, scale);
+ ArrowDecimalSetInt(&decimal_max_64, 9223372036854775807LL);
+
+ ArrowDecimalInit(&decimal_min_64, size, precision, scale);
+ ArrowDecimalSetInt(&decimal_min_64, -9223372036854775807LL - 1);
+
+ ArrowDecimalInit(&decimal_max_128, size, precision, scale);
+ struct ArrowStringView max_digits_0;
+ max_digits_0.data = "99999999999999999999999999999999999999";
+ max_digits_0.size_bytes = 38;
+ ArrowDecimalSetDigits(&decimal_max_128, max_digits_0);
+
+ ArrowDecimalInit(&decimal_min_128, size, precision, scale);
+ struct ArrowStringView min_digits_0;
+ min_digits_0.data = "-99999999999999999999999999999999999999";
+ min_digits_0.size_bytes = 39;
+ ArrowDecimalSetDigits(&decimal_min_128, min_digits_0);
+
+ ArrowDecimalInit(&decimal_no_frac, size, precision, scale);
+ struct ArrowStringView no_frac_digits_0;
+ no_frac_digits_0.data = "1000000000000000000000000000000000";
+ no_frac_digits_0.size_bytes = 34;
+ ArrowDecimalSetDigits(&decimal_no_frac, no_frac_digits_0);
+
+ const std::vector<std::optional<ArrowDecimal*>> values = {
+ &decimal_max_128, &decimal_min_128, &decimal0, &decimal_no_frac,
&decimal_max_64,
+ &decimal_min_64, &decimal1, &decimal2, &decimal3,
&decimal4};
+
+ ArrowSchemaInit(&schema.value);
+ ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0);
+ ASSERT_EQ(ArrowSchemaSetTypeDecimal(schema.value.children[0], type,
precision, scale),
+ 0);
+ ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0);
+ ASSERT_EQ(adbc_validation::MakeBatch<ArrowDecimal*>(&schema.value,
&array.value,
+ &na_error, values),
+ ADBC_STATUS_OK);
+
+ PostgresCopyStreamWriteTester tester;
+ ASSERT_EQ(tester.Init(&schema.value, &array.value, *type_resolver_),
NANOARROW_OK);
+ ASSERT_EQ(tester.WriteAll(nullptr), ENODATA);
+
+ const struct ArrowBuffer buf = tester.WriteBuffer();
+ constexpr size_t buf_size = sizeof(kTestPgCopyNumericScale0) - 2;
+ ASSERT_EQ(buf.size_bytes, static_cast<int64_t>(buf_size));
+ for (size_t i = 0; i < buf_size; i++) {
+ ASSERT_EQ(buf.data[i], kTestPgCopyNumericScale0[i]) << " at position " <<
i;
+ }
+}
+
+// Test negative scale
+// COPY (SELECT CAST(col AS NUMERIC) AS col FROM (VALUES
+// (12300), (-12300), (0), (922337203685477580700),
+// (99999999999999999999999999999999999900),
+// (-99999999999999999999999999999999999900))
+// AS drvd(col)) TO STDOUT WITH (FORMAT binary);
+static uint8_t kTestPgCopyNumericNegScale2[] = {
+ 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00,
0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00,
0x02, 0x00,
+ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x08, 0xfc, 0x00, 0x01, 0x00,
0x00, 0x00,
+ 0x0c, 0x00, 0x02, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x01, 0x08,
0xfc, 0x00,
+ 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00,
+ 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00,
0x00, 0x00,
+ 0x09, 0x08, 0xb9, 0x1c, 0x23, 0x1a, 0xc6, 0x1e, 0x4e, 0x02, 0xbc, 0x00,
0x01, 0x00,
+ 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00,
0x63, 0x27,
+ 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27,
0x0f, 0x27,
+ 0x0f, 0x26, 0xac, 0x00, 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x0a, 0x00,
0x09, 0x40,
+ 0x00, 0x00, 0x00, 0x00, 0x63, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27,
0x0f, 0x27,
+ 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x27, 0x0f, 0x26, 0xac, 0xff, 0xff};
+
+TEST_F(PostgresCopyTest, PostgresCopyWriteNumericNegativeScale) {
+ adbc_validation::Handle<struct ArrowSchema> schema;
+ adbc_validation::Handle<struct ArrowArray> array;
+ struct ArrowError na_error;
+ constexpr enum ArrowType type = NANOARROW_TYPE_DECIMAL128;
+ constexpr int32_t size = 128;
+ constexpr int32_t precision = 38;
+ constexpr int32_t scale = -2;
+
+ struct ArrowDecimal decimal1;
+ struct ArrowDecimal decimal2;
+ struct ArrowDecimal decimal_zero;
+ struct ArrowDecimal decimal_large;
+ struct ArrowDecimal decimal_max_128;
+ struct ArrowDecimal decimal_min_128;
+
+ ArrowDecimalInit(&decimal1, size, precision, scale);
+ ArrowDecimalSetInt(&decimal1, 123);
+
+ ArrowDecimalInit(&decimal2, size, precision, scale);
+ ArrowDecimalSetInt(&decimal2, -123);
+
+ ArrowDecimalInit(&decimal_zero, size, precision, scale);
+ ArrowDecimalSetInt(&decimal_zero, 0);
+
+ ArrowDecimalInit(&decimal_large, size, precision, scale);
+ ArrowDecimalSetInt(&decimal_large, 9223372036854775807LL);
+
+ ArrowDecimalInit(&decimal_max_128, size, precision, scale);
+ struct ArrowStringView max_digits;
+ max_digits.data = "999999999999999999999999999999999999";
+ max_digits.size_bytes = 36;
+ ArrowDecimalSetDigits(&decimal_max_128, max_digits);
+
+ ArrowDecimalInit(&decimal_min_128, size, precision, scale);
+ struct ArrowStringView min_digits;
+ min_digits.data = "-999999999999999999999999999999999999";
+ min_digits.size_bytes = 37; // 36 digits + 1 for '-' sign
+ ArrowDecimalSetDigits(&decimal_min_128, min_digits);
+
+ const std::vector<std::optional<ArrowDecimal*>> values = {
+ &decimal1, &decimal2, &decimal_zero,
+ &decimal_large, &decimal_max_128, &decimal_min_128};
+
+ ArrowSchemaInit(&schema.value);
+ ASSERT_EQ(ArrowSchemaSetTypeStruct(&schema.value, 1), 0);
+ ASSERT_EQ(ArrowSchemaSetTypeDecimal(schema.value.children[0], type,
precision, scale),
+ 0);
+ ASSERT_EQ(ArrowSchemaSetName(schema.value.children[0], "col"), 0);
+ ASSERT_EQ(adbc_validation::MakeBatch<ArrowDecimal*>(&schema.value,
&array.value,
+ &na_error, values),
+ ADBC_STATUS_OK);
+
+ PostgresCopyStreamWriteTester tester;
+ ASSERT_EQ(tester.Init(&schema.value, &array.value, *type_resolver_),
NANOARROW_OK);
+ ASSERT_EQ(tester.WriteAll(nullptr), ENODATA);
+
+ const struct ArrowBuffer buf = tester.WriteBuffer();
+ constexpr size_t buf_size = sizeof(kTestPgCopyNumericNegScale2) - 2;
+ ASSERT_EQ(buf.size_bytes, static_cast<int64_t>(buf_size));
+ for (size_t i = 0; i < buf_size; i++) {
+ ASSERT_EQ(buf.data[i], kTestPgCopyNumericNegScale2[i]) << " at position "
<< i;
+ }
+}
+
using TimestampTestParamType =
std::tuple<enum ArrowTimeUnit, const char*,
std::vector<std::optional<int64_t>>>;
diff --git a/c/driver/postgresql/copy/writer.h
b/c/driver/postgresql/copy/writer.h
index b352635a9..2b31310e7 100644
--- a/c/driver/postgresql/copy/writer.h
+++ b/c/driver/postgresql/copy/writer.h
@@ -17,6 +17,7 @@
#pragma once
+#include <algorithm>
#include <charconv>
#include <cinttypes>
#include <limits>
@@ -224,6 +225,39 @@ class PostgresCopyNumericFieldWriter : public
PostgresCopyFieldWriter {
PostgresCopyNumericFieldWriter(int32_t precision, int32_t scale)
: precision_{precision}, scale_{scale} {}
+ // PostgreSQL NUMERIC Binary Format:
+ // ===================================
+ // PostgreSQL stores NUMERIC values in a variable-length binary format:
+ // - ndigits (int16): Number of base-10000 digits stored
+ // - weight (int16): Position of the first digit group relative to decimal
point
+ // (weight can be negative for small fractional numbers)
+ // - sign (int16): kNumericPos (0x0000) or kNumericNeg (0x4000)
+ // - dscale (int16): Number of decimal digits after the decimal point
(display scale)
+ // - digits[]: Array of int16 values, each 0-9999 (base-10000
representation)
+ //
+ // Value calculation: sum(digits[i] * 10000^(weight - i)) * 10^(-dscale)
+ //
+ // Example 1: 12300 (from Arrow Decimal value=123, scale=-2)
+ // - Logical representation: "12300"
+ // - Grouped in base-10000: [1][2300]
+ // - ndigits=2, weight=1, sign=0x0000, dscale=0, digits=[1, 2300]
+ // - Calculation: 1*10000^1 + 2300*10000^0 = 10000 + 2300 = 12300
+ //
+ // Example 2: 123.45 (from Arrow Decimal value=12345, scale=2)
+ // - Logical representation: "123.45"
+ // - Integer part "123", fractional part "45"
+ // - Grouped in base-10000: [123][4500] (fractional part right-padded)
+ // - ndigits=2, weight=0, sign=0x0000, dscale=2, digits=[123, 4500]
+ // - Calculation: 123*10000^0 + 4500*10000^(-1) = 123 + 0.45 = 123.45
+ //
+ // Example 3: 0.00123 (from Arrow Decimal value=123, scale=5)
+ // - Logical representation: "0.00123"
+ // - Integer part "0", fractional part "00123"
+ // - Grouped in base-10000: [123] (leading zeros skipped via negative
weight)
+ // - ndigits=1, weight=-1, sign=0x0000, dscale=5, digits=[123]
+ // - Calculation: 123*10000^(-1) * 10^0 = 0.0123, but dscale=5 means
display as
+ // 0.00123
+
ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error)
override {
struct ArrowDecimal decimal;
ArrowDecimalInit(&decimal, bitwidth_, precision_, scale_);
@@ -231,65 +265,81 @@ class PostgresCopyNumericFieldWriter : public
PostgresCopyFieldWriter {
const int16_t sign = ArrowDecimalSign(&decimal) > 0 ? kNumericPos :
kNumericNeg;
- // Number of decimal digits per Postgres digit
- constexpr int kDecDigits = 4;
- std::vector<int16_t> pg_digits;
- int16_t weight = -(scale_ / kDecDigits);
- int16_t dscale = scale_;
- bool seen_decimal = scale_ == 0;
- bool truncating_trailing_zeros = true;
-
- char decimal_string[max_decimal_digits_ + 1];
- int digits_remaining = DecimalToString<bitwidth_>(&decimal,
decimal_string);
- do {
- const int start_pos =
- digits_remaining < kDecDigits ? 0 : digits_remaining - kDecDigits;
- const size_t len = digits_remaining < 4 ? digits_remaining : kDecDigits;
- const std::string_view substr{decimal_string + start_pos, len};
- int16_t val{};
- std::from_chars(substr.data(), substr.data() + substr.size(), val);
-
- if (val == 0) {
- if (!seen_decimal && truncating_trailing_zeros) {
- dscale -= kDecDigits;
- }
- } else {
- pg_digits.insert(pg_digits.begin(), val);
- if (!seen_decimal && truncating_trailing_zeros) {
- if (val % 1000 == 0) {
- dscale -= 3;
- } else if (val % 100 == 0) {
- dscale -= 2;
- } else if (val % 10 == 0) {
- dscale -= 1;
- }
- }
- truncating_trailing_zeros = false;
- }
- digits_remaining -= kDecDigits;
- if (digits_remaining <= 0) {
- break;
- }
- weight++;
-
- if (start_pos <= static_cast<int>(std::strlen(decimal_string)) - scale_)
{
- seen_decimal = true;
- }
- } while (true);
-
- int16_t ndigits = pg_digits.size();
- int32_t field_size_bytes = sizeof(ndigits) + sizeof(weight) + sizeof(sign)
+
+ // Convert decimal to string and split into integer/fractional parts
+ // Example transformation for Arrow Decimal(value=12345, scale=2)
representing 123.45:
+ // Input: decimal.value = 12345, scale_ = 2
+ // After DecimalToString: raw_decimal_string = "12345", original_digits
= 5
+ // After SplitDecimalParts: parts.integer_part = "123"
+ // parts.fractional_part = "45"
+ // parts.effective_scale = 2
+ char raw_decimal_string[max_decimal_digits_ + 1];
+ int original_digits = DecimalToString<bitwidth_>(&decimal,
raw_decimal_string);
+ DecimalParts parts = SplitDecimalParts(raw_decimal_string,
original_digits, scale_);
+
+ // Group into PostgreSQL base-10000 representation
+ // After GroupIntegerDigits: int_digits = [123], weight = 0
+ // (groups "123" right-to-left: "123" → 123, only 1 group so weight =
0)
+ auto [int_digits, weight] = GroupIntegerDigits(parts.integer_part);
+
+ // After GroupFractionalDigits: frac_digits = [4500], final_weight = 0
+ // (groups "45" left-to-right with right-padding: "45" → "4500" → 4500)
+ auto [frac_digits, final_weight] =
+ GroupFractionalDigits(parts.fractional_part, weight,
!parts.integer_part.empty());
+
+ // Combine digit arrays
+ // After combining: all_digits = [123, 4500]
+ std::vector<int16_t> all_digits = int_digits;
+ all_digits.insert(all_digits.end(), frac_digits.begin(),
frac_digits.end());
+
+ // Calculate display scale by counting trailing zeros in the DECIMAL STRING
+ // For our example: frac_part="45" has 0 trailing zeros,
effective_scale=2
+ // So dscale = 2 - 0 = 2 (2 fractional digits to display)
+ int trailing_zeros = 0;
+ for (int j = parts.fractional_part.length() - 1;
+ j >= 0 && parts.fractional_part[j] == '0'; j--) {
+ trailing_zeros++;
+ }
+ int16_t dscale =
+ static_cast<int16_t>((std::max)(0, parts.effective_scale -
trailing_zeros));
+
+ // Optimize: remove trailing zero digit groups from fractional part
+ int n_int_digit_groups = int_digits.size();
+ while (static_cast<int>(all_digits.size()) > n_int_digit_groups &&
+ all_digits.back() == 0) {
+ all_digits.pop_back();
+ }
+
+ // Handle zero special case
+ if (all_digits.empty()) {
+ final_weight = 0;
+ dscale = 0;
+ } else if (static_cast<int>(all_digits.size()) <= n_int_digit_groups) {
+ // All fractional digits were removed
+ dscale = 0;
+ }
+
+ if (dscale < 0) dscale = 0;
+
+ // Write PostgreSQL NUMERIC binary format to buffer
+ // Final values for our example: ndigits = 2
+ // final_weight = 0
+ // sign = 0x0000
+ // dscale = 2
+ // digits = [123, 4500]
+ // Binary output represents: 123 * 10000^0 + 4500 * 10000^(-1) = 123 +
0.45 = 123.45
+ int16_t ndigits = all_digits.size();
+ int32_t field_size_bytes = sizeof(ndigits) + sizeof(final_weight) +
sizeof(sign) +
sizeof(dscale) + ndigits * sizeof(int16_t);
NANOARROW_RETURN_NOT_OK(WriteChecked<int32_t>(buffer, field_size_bytes,
error));
NANOARROW_RETURN_NOT_OK(WriteChecked<int16_t>(buffer, ndigits, error));
- NANOARROW_RETURN_NOT_OK(WriteChecked<int16_t>(buffer, weight, error));
+ NANOARROW_RETURN_NOT_OK(WriteChecked<int16_t>(buffer, final_weight,
error));
NANOARROW_RETURN_NOT_OK(WriteChecked<int16_t>(buffer, sign, error));
NANOARROW_RETURN_NOT_OK(WriteChecked<int16_t>(buffer, dscale, error));
- const size_t pg_digit_bytes = sizeof(int16_t) * pg_digits.size();
+ const size_t pg_digit_bytes = sizeof(int16_t) * all_digits.size();
NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, pg_digit_bytes));
- for (auto pg_digit : pg_digits) {
+ for (auto pg_digit : all_digits) {
WriteUnsafe<int16_t>(buffer, pg_digit);
}
@@ -297,9 +347,19 @@ class PostgresCopyNumericFieldWriter : public
PostgresCopyFieldWriter {
}
private:
- // returns the length of the string
+ // Helper struct for organizing data flow between functions
+ struct DecimalParts {
+ std::string integer_part; // e.g., "12300" or "123"
+ std::string fractional_part; // e.g., "45" or "00123"
+ int effective_scale; // Scale after handling negative values
+ };
+
+ // Helper function implementations for decimal-to-PostgreSQL NUMERIC
conversion
+
+ // Convert decimal to string (absolute value, no sign)
+ // Returns the length of the string
template <int32_t DEC_WIDTH>
- int DecimalToString(struct ArrowDecimal* decimal, char* out) {
+ int DecimalToString(struct ArrowDecimal* decimal, char* out) const {
constexpr size_t nwords = (DEC_WIDTH == 128) ? 2 : 4;
uint8_t tmp[DEC_WIDTH / 8];
ArrowDecimalGetBytes(decimal, tmp);
@@ -322,10 +382,9 @@ class PostgresCopyNumericFieldWriter : public
PostgresCopyFieldWriter {
for (size_t i = 0; i < DEC_WIDTH; i++) {
int carry;
- carry = (buf[nwords - 1] >= 0x7FFFFFFFFFFFFFFF);
+ carry = (buf[nwords - 1] > 0x7FFFFFFFFFFFFFFF);
for (size_t j = nwords - 1; j > 0; j--) {
- buf[j] =
- ((buf[j] << 1) & 0xFFFFFFFFFFFFFFFF) + (buf[j - 1] >=
0x7FFFFFFFFFFFFFFF);
+ buf[j] = ((buf[j] << 1) & 0xFFFFFFFFFFFFFFFF) + (buf[j - 1] >
0x7FFFFFFFFFFFFFFF);
}
buf[0] = ((buf[0] << 1) & 0xFFFFFFFFFFFFFFFF);
@@ -350,6 +409,117 @@ class PostgresCopyNumericFieldWriter : public
PostgresCopyFieldWriter {
return ndigits;
}
+ DecimalParts SplitDecimalParts(const char* decimal_digits, int digit_count,
+ int scale) const {
+ // Virtual zeros represent the logical zeros appended for negative scale
+ // Example: value=123, scale=-2 → "123" with 2 virtual zeros = "12300"
+ const int virtual_zeros = (scale < 0) ? -scale : 0;
+ const int effective_scale = (scale < 0) ? 0 : scale;
+ const int total_logical_digits = digit_count + virtual_zeros;
+
+ // Calculate split point
+ const int n_int_digits = total_logical_digits > effective_scale
+ ? total_logical_digits - effective_scale
+ : 0;
+ const int n_frac_digits = total_logical_digits - n_int_digits;
+
+ DecimalParts parts;
+ parts.effective_scale = effective_scale;
+
+ // Extract integer part
+ if (n_int_digits > 0) {
+ if (n_int_digits <= digit_count) {
+ // Integer part is within the original digits
+ parts.integer_part.assign(decimal_digits, n_int_digits);
+ } else {
+ // Integer part includes all original digits + virtual zeros
+ parts.integer_part.assign(decimal_digits, digit_count);
+ parts.integer_part.append(virtual_zeros, '0');
+ }
+ }
+
+ // Extract fractional part (only exists if scale > 0)
+ if (n_int_digits == 0 && total_logical_digits < effective_scale) {
+ // Small fractional: 0.00123 needs leading zeros
+ parts.fractional_part.assign(effective_scale - total_logical_digits,
'0');
+ parts.fractional_part.append(decimal_digits, digit_count);
+ } else if (n_frac_digits > 0 && n_int_digits < digit_count) {
+ // Fractional part from remaining digits (virtual zeros don't appear in
fractional
+ // part)
+ parts.fractional_part.assign(decimal_digits + n_int_digits,
+ digit_count - n_int_digits);
+ }
+
+ return parts;
+ }
+
+ std::pair<std::vector<int16_t>, int16_t> GroupIntegerDigits(
+ const std::string& int_part) const {
+ constexpr int kDecDigits = 4;
+ std::vector<int16_t> digits;
+
+ if (int_part.empty()) {
+ return {digits, -1}; // weight = -1 for pure fractional numbers
+ }
+
+ // Calculate weight: ceil(length / 4) - 1
+ int16_t weight = (int_part.length() + kDecDigits - 1) / kDecDigits - 1;
+
+ // Group right-to-left in chunks of 4
+ int i = int_part.length();
+ while (i > 0) {
+ int chunk_size = (std::min)(i, kDecDigits);
+ std::string_view chunk =
+ std::string_view(int_part).substr(i - chunk_size, chunk_size);
+
+ int16_t val{};
+ std::from_chars(chunk.data(), chunk.data() + chunk.size(), val);
+
+ // Skip trailing zeros
+ if (val != 0 || !digits.empty()) {
+ digits.insert(digits.begin(), val);
+ }
+ i -= chunk_size;
+ }
+
+ return {digits, weight};
+ }
+
+ std::pair<std::vector<int16_t>, int16_t> GroupFractionalDigits(
+ const std::string& frac_part, int16_t initial_weight, bool
has_integer_part) const {
+ constexpr int kDecDigits = 4;
+ std::vector<int16_t> digits;
+ int16_t weight = initial_weight;
+
+ if (frac_part.empty()) {
+ return {digits, weight};
+ }
+
+ bool skip_leading_zeros = !has_integer_part;
+
+ // Group left-to-right in chunks of 4, right-padding last chunk
+ for (size_t i = 0; i < frac_part.length(); i += kDecDigits) {
+ int chunk_size = (std::min)(kDecDigits,
static_cast<int>(frac_part.length() - i));
+ std::string chunk_str = frac_part.substr(i, chunk_size);
+
+ // Right-pad to 4 digits (e.g., "45" → "4500")
+ chunk_str.resize(kDecDigits, '0');
+
+ int16_t val{};
+ std::from_chars(chunk_str.data(), chunk_str.data() + chunk_str.size(),
val);
+
+ if (skip_leading_zeros && val == 0) {
+ // Skip leading zero groups in fractional part (e.g., 0.0012 → skip
"0012")
+ weight--;
+ } else {
+ digits.push_back(val);
+ skip_leading_zeros = false;
+ }
+ }
+
+ return {digits, weight};
+ }
+
static constexpr uint16_t kNumericPos = 0x0000;
static constexpr uint16_t kNumericNeg = 0x4000;
static constexpr int32_t bitwidth_ = (T == NANOARROW_TYPE_DECIMAL128) ? 128
: 256;
diff --git a/c/driver/postgresql/validation/queries/ingest/decimal.toml
b/c/driver/postgresql/validation/queries/ingest/decimal.toml
deleted file mode 100644
index 0f154e4b4..000000000
--- a/c/driver/postgresql/validation/queries/ingest/decimal.toml
+++ /dev/null
@@ -1,19 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-
-skip = "decimal ingest code has a bug and fix has not been merged in yet.
https://github.com/apache/arrow-adbc/pull/3787"
diff --git a/c/driver/postgresql/validation/queries/ingest/decimal.txtcase
b/c/driver/postgresql/validation/queries/ingest/decimal.txtcase
new file mode 100644
index 000000000..81bc7bf87
--- /dev/null
+++ b/c/driver/postgresql/validation/queries/ingest/decimal.txtcase
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// part: expected_schema
+{
+ "format": "+s",
+ "children": [
+ {
+ "name": "idx",
+ "format": "l",
+ "flags": ["nullable"]
+ },
+ {
+ "name": "value",
+ "format": "u",
+ "flags": ["nullable"],
+ "metadata": {
+ "ARROW:extension:name": "arrow.opaque",
+ "ARROW:extension:metadata": "{\"type_name\": \"numeric\",
\"vendor_name\": \"PostgreSQL\"}"
+ }
+ }
+ ]
+}
+
+// part: expected
+
+{"idx": 0, "value": "0"}
+{"idx": 1, "value": "123.45"}
+{"idx": 2, "value": "-123.45"}
+{"idx": 3, "value": "9999999.99"}
+{"idx": 4, "value": "-9999999.99"}
diff --git a/c/driver/postgresql/validation/queries/type/bind/decimal.toml
b/c/driver/postgresql/validation/queries/type/bind/decimal.toml
deleted file mode 100644
index 0f154e4b4..000000000
--- a/c/driver/postgresql/validation/queries/type/bind/decimal.toml
+++ /dev/null
@@ -1,19 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-
-skip = "decimal ingest code has a bug and fix has not been merged in yet.
https://github.com/apache/arrow-adbc/pull/3787"
diff --git a/c/driver/postgresql/validation/queries/type/bind/decimal.txtcase
b/c/driver/postgresql/validation/queries/type/bind/decimal.txtcase
new file mode 100644
index 000000000..aac33f269
--- /dev/null
+++ b/c/driver/postgresql/validation/queries/type/bind/decimal.txtcase
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// part: expected_schema
+{
+ "format": "+s",
+ "children": [
+ {
+ "name": "res",
+ "format": "u",
+ "flags": ["nullable"],
+ "metadata": {
+ "ARROW:extension:name": "arrow.opaque",
+ "ARROW:extension:metadata": "{\"type_name\": \"numeric\",
\"vendor_name\": \"PostgreSQL\"}"
+ }
+ }
+ ]
+}