This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new d3ccc833a6 GH-36867: [C++] Add a struct_ and schema overload taking a
vector of (name, type) pairs (#36915)
d3ccc833a6 is described below
commit d3ccc833a61b70a988090cd8065d3e38d7c29a89
Author: 谢天 <[email protected]>
AuthorDate: Wed Aug 9 19:33:37 2023 +0800
GH-36867: [C++] Add a struct_ and schema overload taking a vector of (name,
type) pairs (#36915)
### Rationale for this change
Mostly for convenience. It would be nice to be able to write:
```struct_({{"a", int8()}, {"b", utf8()}});```
instead of:
```struct_({field("a", int8()), field("b", utf8())});```
Same with the schema factory.
### What changes are included in this PR?
Add a struct_ overload and two schema overload taking a vector of (name,
type) pairs to construct a vector of fields.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes. Add three ARROW_EXPORT functions.
* Closes: #36867
Authored-by: jsjtxietian <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/engine/substrait/type_internal.cc | 7 +++---
cpp/src/arrow/type.cc | 28 ++++++++++++++++++++++++
cpp/src/arrow/type_fwd.h | 29 +++++++++++++++++++++++++
cpp/src/arrow/type_test.cc | 13 +++++++++++
cpp/src/parquet/arrow/schema.cc | 2 +-
5 files changed, 75 insertions(+), 4 deletions(-)
diff --git a/cpp/src/arrow/engine/substrait/type_internal.cc
b/cpp/src/arrow/engine/substrait/type_internal.cc
index 03d1f999a1..1f9141f36b 100644
--- a/cpp/src/arrow/engine/substrait/type_internal.cc
+++ b/cpp/src/arrow/engine/substrait/type_internal.cc
@@ -77,9 +77,10 @@ Result<FieldVector> FieldsFromProto(int size, const Types&
types,
if (types.Get(i).has_struct_()) {
const auto& struct_ = types.Get(i).struct_();
- ARROW_ASSIGN_OR_RAISE(type, FieldsFromProto(struct_.types_size(),
struct_.types(),
- next_name, ext_set,
conversion_options)
- .Map(arrow::struct_));
+ ARROW_ASSIGN_OR_RAISE(
+ auto fields, FieldsFromProto(struct_.types_size(), struct_.types(),
next_name,
+ ext_set, conversion_options));
+ type = ::arrow::struct_(std::move(fields));
nullable = IsNullable(struct_);
} else {
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 9267f1e499..86df91268f 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -276,6 +276,17 @@ std::shared_ptr<Field> MaybePromoteNullTypes(const Field&
existing, const Field&
// `other` must be null.
return existing.WithNullable(true);
}
+
+std::vector<std::shared_ptr<Field>> MakeFields(
+ std::initializer_list<std::pair<std::string, std::shared_ptr<DataType>>>
init_list) {
+ std::vector<std::shared_ptr<Field>> fields;
+ fields.reserve(init_list.size());
+ for (const auto& [name, type] : init_list) {
+ fields.push_back(field(name, type));
+ }
+ return fields;
+}
+
} // namespace
Field::~Field() {}
@@ -2125,12 +2136,24 @@ std::shared_ptr<Schema>
schema(std::vector<std::shared_ptr<Field>> fields,
return std::make_shared<Schema>(std::move(fields), std::move(metadata));
}
+std::shared_ptr<Schema> schema(
+ std::initializer_list<std::pair<std::string, std::shared_ptr<DataType>>>
fields,
+ std::shared_ptr<const KeyValueMetadata> metadata) {
+ return std::make_shared<Schema>(MakeFields(fields), std::move(metadata));
+}
+
std::shared_ptr<Schema> schema(std::vector<std::shared_ptr<Field>> fields,
Endianness endianness,
std::shared_ptr<const KeyValueMetadata>
metadata) {
return std::make_shared<Schema>(std::move(fields), endianness,
std::move(metadata));
}
+std::shared_ptr<Schema> schema(
+ std::initializer_list<std::pair<std::string, std::shared_ptr<DataType>>>
fields,
+ Endianness endianness, std::shared_ptr<const KeyValueMetadata> metadata) {
+ return std::make_shared<Schema>(MakeFields(fields), endianness,
std::move(metadata));
+}
+
Result<std::shared_ptr<Schema>> UnifySchemas(
const std::vector<std::shared_ptr<Schema>>& schemas,
const Field::MergeOptions field_merge_options) {
@@ -2641,6 +2664,11 @@ std::shared_ptr<DataType> struct_(const
std::vector<std::shared_ptr<Field>>& fie
return std::make_shared<StructType>(fields);
}
+std::shared_ptr<DataType> struct_(
+ std::initializer_list<std::pair<std::string, std::shared_ptr<DataType>>>
fields) {
+ return std::make_shared<StructType>(MakeFields(fields));
+}
+
std::shared_ptr<DataType> run_end_encoded(std::shared_ptr<arrow::DataType>
run_end_type,
std::shared_ptr<DataType>
value_type) {
return std::make_shared<RunEndEncodedType>(std::move(run_end_type),
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index a8a27139d1..d3b41c8158 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -560,6 +560,10 @@ ARROW_EXPORT std::shared_ptr<DataType>
time64(TimeUnit::type unit);
ARROW_EXPORT std::shared_ptr<DataType> struct_(
const std::vector<std::shared_ptr<Field>>& fields);
+/// \brief Create a StructType instance from (name, type) pairs
+ARROW_EXPORT std::shared_ptr<DataType> struct_(
+ std::initializer_list<std::pair<std::string, std::shared_ptr<DataType>>>
fields);
+
/// \brief Create a RunEndEncodedType instance
ARROW_EXPORT std::shared_ptr<DataType> run_end_encoded(
std::shared_ptr<DataType> run_end_type, std::shared_ptr<DataType>
value_type);
@@ -629,6 +633,18 @@ std::shared_ptr<Schema> schema(
std::vector<std::shared_ptr<Field>> fields,
std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
+/// \brief Create a Schema instance from (name, type) pairs
+///
+/// The schema's fields will all be nullable with no associated metadata.
+///
+/// \param fields (name, type) pairs of the schema's fields
+/// \param metadata any custom key-value metadata, default null
+/// \return schema shared_ptr to Schema
+ARROW_EXPORT
+std::shared_ptr<Schema> schema(
+ std::initializer_list<std::pair<std::string, std::shared_ptr<DataType>>>
fields,
+ std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
+
/// \brief Create a Schema instance
///
/// \param fields the schema's fields
@@ -640,6 +656,19 @@ std::shared_ptr<Schema> schema(
std::vector<std::shared_ptr<Field>> fields, Endianness endianness,
std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
+/// \brief Create a Schema instance
+///
+/// The schema's fields will all be nullable with no associated metadata.
+///
+/// \param fields (name, type) pairs of the schema's fields
+/// \param endianness the endianness of the data
+/// \param metadata any custom key-value metadata, default null
+/// \return schema shared_ptr to Schema
+ARROW_EXPORT
+std::shared_ptr<Schema> schema(
+ std::initializer_list<std::pair<std::string, std::shared_ptr<DataType>>>
fields,
+ Endianness endianness, std::shared_ptr<const KeyValueMetadata> metadata =
NULLPTR);
+
/// @}
/// Return the process-wide default memory pool.
diff --git a/cpp/src/arrow/type_test.cc b/cpp/src/arrow/type_test.cc
index b008929e87..c55b33b415 100644
--- a/cpp/src/arrow/type_test.cc
+++ b/cpp/src/arrow/type_test.cc
@@ -414,6 +414,13 @@ TEST_F(TestSchema, Basics) {
ASSERT_NE(schema4->fingerprint(), schema7->fingerprint());
ASSERT_EQ(schema6->fingerprint(), schema7->fingerprint());
#endif
+
+ auto schema8 = ::arrow::schema({field("f0", int8()), field("f1", int32())});
+ auto schema9 = ::arrow::schema({{"f0", int8()}, {"f1", int32()}});
+ auto schema10 = ::arrow::schema({{"f2", int8()}, {"f1", int32()}});
+
+ AssertSchemaEqual(schema8, schema9);
+ AssertSchemaNotEqual(schema8, schema10);
}
TEST_F(TestSchema, ToString) {
@@ -1479,6 +1486,12 @@ TEST(TestStructType, Basics) {
ASSERT_EQ(struct_type.ToString(), "struct<f0: int32, f1: string, f2:
uint8>");
+ auto t1 = struct_({{"a", int8()}, {"b", utf8()}});
+ auto t2 = struct_({field("a", int8()), field("b", utf8())});
+ auto t3 = struct_({field("c", int8()), field("b", utf8())});
+ ASSERT_TRUE(t1->Equals(t2));
+ ASSERT_TRUE(!t1->Equals(t3));
+
// TODO(wesm): out of bounds for field(...)
}
diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index f713548d05..3323b7ff8b 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -839,7 +839,7 @@
std::function<std::shared_ptr<::arrow::DataType>(FieldVector)> GetNestedFactory(
switch (inferred_type.id()) {
case ::arrow::Type::STRUCT:
if (origin_type.id() == ::arrow::Type::STRUCT) {
- return ::arrow::struct_;
+ return [](FieldVector fields) { return
::arrow::struct_(std::move(fields)); };
}
break;
case ::arrow::Type::LIST: