This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 64f555f56 feat(c/driver/postgresql): return JSON as arrow.json
extension (#4415)
64f555f56 is described below
commit 64f555f56bfc2b89b04530d2bb7e758e6b7ae80e
Author: David Li <[email protected]>
AuthorDate: Sat Jun 20 03:00:13 2026 -0700
feat(c/driver/postgresql): return JSON as arrow.json extension (#4415)
Closes #4382.
Assisted-by: GPT-5.5 <[email protected]>
---
c/driver/postgresql/postgres_type.h | 15 ++++-
c/driver/postgresql/postgres_type_test.cc | 30 +++++++++
c/driver/postgresql/postgresql_test.cc | 8 +++
c/driver/postgresql/validation/README.md | 4 +-
.../validation/queries/type/select/json.txtcase | 73 ++++++++++++++++++++++
.../validation/queries/type/select/jsonb.txtcase | 73 ++++++++++++++++++++++
docs/source/driver/postgresql.rst | 4 +-
7 files changed, 201 insertions(+), 6 deletions(-)
diff --git a/c/driver/postgresql/postgres_type.h
b/c/driver/postgresql/postgres_type.h
index 9768bb1c9..248bf2a13 100644
--- a/c/driver/postgresql/postgres_type.h
+++ b/c/driver/postgresql/postgres_type.h
@@ -256,10 +256,22 @@ class PostgresType {
case PostgresTypeId::kText:
case PostgresTypeId::kName:
case PostgresTypeId::kEnum:
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema,
NANOARROW_TYPE_STRING));
+ break;
case PostgresTypeId::kJson:
- case PostgresTypeId::kJsonb:
+ case PostgresTypeId::kJsonb: {
NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema,
NANOARROW_TYPE_STRING));
+ nanoarrow::UniqueBuffer buffer;
+
+ NANOARROW_RETURN_NOT_OK(ArrowMetadataBuilderInit(buffer.get(),
nullptr));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowMetadataBuilderAppend(buffer.get(),
ArrowCharView(kExtensionName),
+ ArrowCharView(kJsonExtensionName)));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowSchemaSetMetadata(schema,
reinterpret_cast<char*>(buffer->data)));
+
break;
+ }
case PostgresTypeId::kBytea:
NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema,
NANOARROW_TYPE_BINARY));
break;
@@ -346,6 +358,7 @@ class PostgresType {
static constexpr const char* kPostgresTypeKey = "ADBC:postgresql:typname";
static constexpr const char* kExtensionName = "ARROW:extension:name";
static constexpr const char* kOpaqueExtensionName = "arrow.opaque";
+ static constexpr const char* kJsonExtensionName = "arrow.json";
static constexpr const char* kExtensionMetadata = "ARROW:extension:metadata";
ArrowErrorCode AddPostgresTypeMetadata(ArrowSchema* schema,
diff --git a/c/driver/postgresql/postgres_type_test.cc
b/c/driver/postgresql/postgres_type_test.cc
index bd2fdd638..599e63074 100644
--- a/c/driver/postgresql/postgres_type_test.cc
+++ b/c/driver/postgresql/postgres_type_test.cc
@@ -163,6 +163,36 @@ TEST(PostgresTypeTest, PostgresTypeSetSchema) {
EXPECT_STREQ(schema->format, "u");
schema.reset();
+ ArrowSchemaInit(schema.get());
+ EXPECT_EQ(PostgresType(PostgresTypeId::kJson).SetSchema(schema.get()),
NANOARROW_OK);
+ EXPECT_STREQ(schema->format, "u");
+ typnameMetadataValue = ArrowCharView("<not found>");
+ ArrowMetadataGetValue(schema->metadata,
ArrowCharView("ARROW:extension:name"),
+ &typnameMetadataValue);
+ EXPECT_EQ(std::string(typnameMetadataValue.data,
typnameMetadataValue.size_bytes),
+ "arrow.json");
+ typnameMetadataValue = ArrowCharView("<not found>");
+ ArrowMetadataGetValue(schema->metadata,
ArrowCharView("ARROW:extension:metadata"),
+ &typnameMetadataValue);
+ EXPECT_EQ(std::string(typnameMetadataValue.data,
typnameMetadataValue.size_bytes),
+ "<not found>");
+ schema.reset();
+
+ ArrowSchemaInit(schema.get());
+ EXPECT_EQ(PostgresType(PostgresTypeId::kJsonb).SetSchema(schema.get()),
NANOARROW_OK);
+ EXPECT_STREQ(schema->format, "u");
+ typnameMetadataValue = ArrowCharView("<not found>");
+ ArrowMetadataGetValue(schema->metadata,
ArrowCharView("ARROW:extension:name"),
+ &typnameMetadataValue);
+ EXPECT_EQ(std::string(typnameMetadataValue.data,
typnameMetadataValue.size_bytes),
+ "arrow.json");
+ typnameMetadataValue = ArrowCharView("<not found>");
+ ArrowMetadataGetValue(schema->metadata,
ArrowCharView("ARROW:extension:metadata"),
+ &typnameMetadataValue);
+ EXPECT_EQ(std::string(typnameMetadataValue.data,
typnameMetadataValue.size_bytes),
+ "<not found>");
+ schema.reset();
+
ArrowSchemaInit(schema.get());
EXPECT_EQ(PostgresType(PostgresTypeId::kBytea).SetSchema(schema.get()),
NANOARROW_OK);
EXPECT_STREQ(schema->format, "z");
diff --git a/c/driver/postgresql/postgresql_test.cc
b/c/driver/postgresql/postgresql_test.cc
index e376ab959..e2582299b 100644
--- a/c/driver/postgresql/postgresql_test.cc
+++ b/c/driver/postgresql/postgresql_test.cc
@@ -1419,6 +1419,10 @@ TEST_F(PostgresStatementTest, SqlIngestJson) {
ASSERT_NO_FATAL_FAILURE(reader.GetSchema());
ASSERT_EQ(1, reader.fields.size());
ASSERT_EQ(NANOARROW_TYPE_STRING, reader.fields[0].type);
+ ASSERT_EQ(
+ "arrow.json",
+ std::string_view(reader.fields[0].extension_name.data,
+
static_cast<size_t>(reader.fields[0].extension_name.size_bytes)));
ASSERT_NO_FATAL_FAILURE(reader.Next());
ArrowStringView view =
ArrowArrayViewGetStringUnsafe(reader.array_view->children[0], 0);
@@ -2357,6 +2361,10 @@ TEST_F(PostgresStatementTest, SqlQueryJsonb) {
ASSERT_NO_FATAL_FAILURE(reader.GetSchema());
ASSERT_EQ(1, reader.fields.size());
ASSERT_EQ(NANOARROW_TYPE_STRING, reader.fields[0].type);
+ ASSERT_EQ(
+ "arrow.json",
+ std::string_view(reader.fields[0].extension_name.data,
+
static_cast<size_t>(reader.fields[0].extension_name.size_bytes)));
ASSERT_NO_FATAL_FAILURE(reader.Next());
ArrowStringView view =
ArrowArrayViewGetStringUnsafe(reader.array_view->children[0], 0);
diff --git a/c/driver/postgresql/validation/README.md
b/c/driver/postgresql/validation/README.md
index 9ea9067dc..2d0d86e6c 100644
--- a/c/driver/postgresql/validation/README.md
+++ b/c/driver/postgresql/validation/README.md
@@ -51,9 +51,7 @@ $ export
ADBC_POSTGRESQL_TEST_URI=postgresql://localhost:5432/postgres?user=post
### 5. Run Tests
-#### Using pixi:
-
```bash
cd c/driver/postgresql/validation
-pixi run validate
+uv run pytest
```
diff --git a/c/driver/postgresql/validation/queries/type/select/json.txtcase
b/c/driver/postgresql/validation/queries/type/select/json.txtcase
new file mode 100644
index 000000000..533652129
--- /dev/null
+++ b/c/driver/postgresql/validation/queries/type/select/json.txtcase
@@ -0,0 +1,73 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// part: metadata
+
+[setup]
+drop = "test_json"
+
+[tags]
+sql-type-name = "JSON"
+
+// part: setup_query
+
+CREATE TABLE test_json (
+ idx INT,
+ res JSON
+);
+
+INSERT INTO test_json (idx, res) VALUES (1, NULL);
+INSERT INTO test_json (idx, res) VALUES (2, 'null');
+INSERT INTO test_json (idx, res) VALUES (3, '"foobar"');
+INSERT INTO test_json (idx, res) VALUES (4, '1.0');
+INSERT INTO test_json (idx, res) VALUES (5, 'true');
+INSERT INTO test_json (idx, res) VALUES (6, '[]');
+INSERT INTO test_json (idx, res) VALUES (7, '["spam", "eggs", 2.5, false]');
+INSERT INTO test_json (idx, res) VALUES (8, '{"a": 12345, "b": "hello", "c":
[1, 2, 3], "d": {"e": "nested"}}');
+INSERT INTO test_json (idx, res) VALUES (9, '[{"id": 1, "name": "Alice"},
{"id": 2, "name": "Bob"}]');
+
+// part: query
+
+SELECT res FROM test_json ORDER BY idx ASC
+
+// part: expected_schema
+
+{
+ "format": "+s",
+ "children": [
+ {
+ "name": "res",
+ "format": "u",
+ "flags": ["nullable"],
+ "metadata": {
+ "ARROW:extension:name": "arrow.json"
+ }
+ }
+ ]
+}
+
+// part: expected
+
+{"res": null}
+{"res": "null"}
+{"res": "\"foobar\""}
+{"res": "1.0"}
+{"res": "true"}
+{"res": "[]"}
+{"res": "[\"spam\", \"eggs\", 2.5, false]"}
+{"res": "{\"a\": 12345, \"b\": \"hello\", \"c\": [1, 2, 3], \"d\": {\"e\":
\"nested\"}}"}
+{"res": "[{\"id\": 1, \"name\": \"Alice\"}, {\"id\": 2, \"name\": \"Bob\"}]"}
diff --git a/c/driver/postgresql/validation/queries/type/select/jsonb.txtcase
b/c/driver/postgresql/validation/queries/type/select/jsonb.txtcase
new file mode 100644
index 000000000..167f00e89
--- /dev/null
+++ b/c/driver/postgresql/validation/queries/type/select/jsonb.txtcase
@@ -0,0 +1,73 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// part: metadata
+
+[setup]
+drop = "test_jsonb"
+
+[tags]
+sql-type-name = "JSONB"
+
+// part: setup_query
+
+CREATE TABLE test_jsonb (
+ idx INT,
+ res JSONB
+);
+
+INSERT INTO test_jsonb (idx, res) VALUES (1, NULL);
+INSERT INTO test_jsonb (idx, res) VALUES (2, 'null');
+INSERT INTO test_jsonb (idx, res) VALUES (3, '"foobar"');
+INSERT INTO test_jsonb (idx, res) VALUES (4, '1.0');
+INSERT INTO test_jsonb (idx, res) VALUES (5, 'true');
+INSERT INTO test_jsonb (idx, res) VALUES (6, '[]');
+INSERT INTO test_jsonb (idx, res) VALUES (7, '["spam", "eggs", 2.5, false]');
+INSERT INTO test_jsonb (idx, res) VALUES (8, '{"a": 12345, "b": "hello", "c":
[1, 2, 3], "d": {"e": "nested"}}');
+INSERT INTO test_jsonb (idx, res) VALUES (9, '[{"id": 1, "name": "Alice"},
{"id": 2, "name": "Bob"}]');
+
+// part: query
+
+SELECT res FROM test_jsonb ORDER BY idx ASC
+
+// part: expected_schema
+
+{
+ "format": "+s",
+ "children": [
+ {
+ "name": "res",
+ "format": "u",
+ "flags": ["nullable"],
+ "metadata": {
+ "ARROW:extension:name": "arrow.json"
+ }
+ }
+ ]
+}
+
+// part: expected
+
+{"res": null}
+{"res": "null"}
+{"res": "\"foobar\""}
+{"res": "1.0"}
+{"res": "true"}
+{"res": "[]"}
+{"res": "[\"spam\", \"eggs\", 2.5, false]"}
+{"res": "{\"a\": 12345, \"b\": \"hello\", \"c\": [1, 2, 3], \"d\": {\"e\":
\"nested\"}}"}
+{"res": "[{\"id\": 1, \"name\": \"Alice\"}, {\"id\": 2, \"name\": \"Bob\"}]"}
diff --git a/docs/source/driver/postgresql.rst
b/docs/source/driver/postgresql.rst
index cc77b84b0..ba1e6b617 100644
--- a/docs/source/driver/postgresql.rst
+++ b/docs/source/driver/postgresql.rst
@@ -264,9 +264,9 @@ being read or written.
* - INTERVAL
- month_day_nano_interval
* - JSON
- - utf8
+ - utf8 (extension<arrow.json>)
* - JSONB
- - utf8
+ - utf8 (extension<arrow.json>)
* - NUMERIC
- utf8 [#numeric-utf8]_
* - REAL