WillAyd commented on code in PR #1181:
URL: https://github.com/apache/arrow-adbc/pull/1181#discussion_r1355625995
##########
c/driver/postgresql/postgres_copy_reader_test.cc:
##########
@@ -774,6 +774,87 @@ TEST(PostgresCopyUtilsTest, PostgresCopyWriteLargeString) {
}
}
+// COPY (SELECT CAST("col" AS BYTEA) AS "col" FROM ( VALUES ((""),
("\x00\x01"),
+// ("\x01\x02\x03\x04"), ("\xFE\xFF"), (NULL)) AS drvd("col")) TO STDOUT
+// WITH (FORMAT binary);
+static uint8_t kTestPgCopyBinary[] = {
+ 0x50, 0x47, 0x43, 0x4f, 0x50, 0x59, 0x0a, 0xff, 0x0d, 0x0a, 0x00, 0x00,
0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04,
+ 0x01, 0x02, 0x03, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0xfe, 0xff,
+ 0x00, 0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+
+TEST(PostgresCopyUtilsTest, PostgresCopyReadBinary) {
+ ArrowBufferView data;
+ data.data.as_uint8 = kTestPgCopyBinary;
+ data.size_bytes = sizeof(kTestPgCopyBinary);
+
+ auto col_type = PostgresType(PostgresTypeId::kBytea);
+ PostgresType input_type(PostgresTypeId::kRecord);
+ input_type.AppendChild("col", col_type);
+
+ PostgresCopyStreamTester tester;
+ ASSERT_EQ(tester.Init(input_type), NANOARROW_OK);
+ ASSERT_EQ(tester.ReadAll(&data), ENODATA);
+ ASSERT_EQ(data.data.as_uint8 - kTestPgCopyBinary, sizeof(kTestPgCopyBinary));
+ ASSERT_EQ(data.size_bytes, 0);
+
+ nanoarrow::UniqueArray array;
+ ASSERT_EQ(tester.GetArray(array.get()), NANOARROW_OK);
+ ASSERT_EQ(array->length, 5);
+ ASSERT_EQ(array->n_children, 1);
+
+ auto validity = reinterpret_cast<const
uint8_t*>(array->children[0]->buffers[0]);
+ auto offsets = reinterpret_cast<const
int32_t*>(array->children[0]->buffers[1]);
+ auto data_buffer = reinterpret_cast<const
char*>(array->children[0]->buffers[2]);
+ ASSERT_NE(validity, nullptr);
+ ASSERT_NE(data_buffer, nullptr);
+
+ ASSERT_TRUE(ArrowBitGet(validity, 0));
+ ASSERT_TRUE(ArrowBitGet(validity, 1));
+ ASSERT_TRUE(ArrowBitGet(validity, 2));
+ ASSERT_TRUE(ArrowBitGet(validity, 3));
+ ASSERT_FALSE(ArrowBitGet(validity, 4));
+
+ ASSERT_EQ(offsets[0], 0);
+ ASSERT_EQ(offsets[1], 0);
+ ASSERT_EQ(offsets[2], 0);
+ ASSERT_EQ(offsets[3], 4);
+ ASSERT_EQ(offsets[4], 6);
+ ASSERT_EQ(offsets[5], 6);
+
+ ASSERT_EQ(std::string(data_buffer + 0, 0), "");
+ ASSERT_EQ(std::string(data_buffer + 0, 0), "");
Review Comment:
Ah nice catch. I need to change the tests around to better account for this,
but yea it looks like postgres should still display those bytes (confirmed in
binary out file as well):
````
SELECT CAST("col" AS BYTEA) AS "col" FROM ( VALUES (''), ('\x0001'),
('\x01020304'), ('\xFEFF'), (NULL)) AS drvd("col");
col
------------
\x
\x0001
\x01020304
\xfeff
```
I think I just need to be careful about constructing a C++ string from that
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]