This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 152e878a2e GH-44714: [C++] Keep field metadata for keys and values
when importing a map type via the C data interface (#44715)
152e878a2e is described below
commit 152e878a2e79877ec461f96cefc97663f0bc581f
Author: Dewey Dunnington <[email protected]>
AuthorDate: Mon Nov 18 14:03:48 2024 +0000
GH-44714: [C++] Keep field metadata for keys and values when importing a
map type via the C data interface (#44715)
### Rationale for this change
Import of a map type from the C data interface drops field metadata
(including extension type information) which does not happen when importing a
map type from IPC or a list of structs. This affects the ability to roundtrip
data through pyarrow/Arrow C++ if extension types are not registered.
### What changes are included in this PR?
The mechanism to import the map type was changed to align with the method
used for IPC import.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
The current behaviour was surprising/inconsistent, so I think this PR
brings it in more line with the current expectation/documentation.
* GitHub Issue: #44714
Authored-by: Dewey Dunnington <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/c/bridge.cc | 8 ++++----
cpp/src/arrow/c/bridge_test.cc | 4 ++++
2 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index f848b34115..6b30802c78 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -1310,13 +1310,13 @@ struct SchemaImporter {
}
bool keys_sorted = (c_struct_->flags & ARROW_FLAG_MAP_KEYS_SORTED);
- bool values_nullable = value_type->field(1)->nullable();
+
// Some implementations of Arrow (such as Rust) use a non-standard field
name
// for key ("keys") and value ("values") fields. For simplicity, we
override
// them on import.
- auto values_field =
- ::arrow::field("value", value_type->field(1)->type(), values_nullable);
- type_ = map(value_type->field(0)->type(), values_field, keys_sorted);
+ type_ =
+ std::make_shared<MapType>(value_type->field(0)->WithName("key"),
+ value_type->field(1)->WithName("value"),
keys_sorted);
return Status::OK();
}
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index bc60b587cf..75fadc7c02 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -3769,6 +3769,10 @@ TEST_F(TestSchemaRoundtrip, RegisteredExtension) {
TEST_F(TestSchemaRoundtrip, Map) {
TestWithTypeFactory([&]() { return map(utf8(), int32()); });
TestWithTypeFactory([&]() { return map(utf8(), field("value", int32(),
false)); });
+ TestWithTypeFactory([&]() {
+ return map(utf8(), field("value", int32(), false,
+ KeyValueMetadata::Make({"meta key"}, {"meta
value"})));
+ });
// Field names are brought in line with the spec on import.
TestWithTypeFactory(
[&]() {