This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new d3ccc833a6 GH-36867: [C++] Add a struct_ and schema overload taking a 
vector of (name, type) pairs (#36915)
d3ccc833a6 is described below

commit d3ccc833a61b70a988090cd8065d3e38d7c29a89
Author: 谢天 <[email protected]>
AuthorDate: Wed Aug 9 19:33:37 2023 +0800

    GH-36867: [C++] Add a struct_ and schema overload taking a vector of (name, 
type) pairs (#36915)
    
    ### Rationale for this change
    Mostly for convenience. It would be nice to be able to write:
    ```struct_({{"a", int8()}, {"b", utf8()}});```
    instead of:
    ```struct_({field("a", int8()), field("b", utf8())});```
    Same with the schema factory.
    
    ### What changes are included in this PR?
    Add a struct_ overload and two schema overload taking a vector of (name, 
type) pairs to construct a vector of fields.
    
    ### Are these changes tested?
    Yes.
    
    ### Are there any user-facing changes?
    Yes. Add three ARROW_EXPORT functions.
    
    * Closes: #36867
    
    Authored-by: jsjtxietian <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/engine/substrait/type_internal.cc |  7 +++---
 cpp/src/arrow/type.cc                           | 28 ++++++++++++++++++++++++
 cpp/src/arrow/type_fwd.h                        | 29 +++++++++++++++++++++++++
 cpp/src/arrow/type_test.cc                      | 13 +++++++++++
 cpp/src/parquet/arrow/schema.cc                 |  2 +-
 5 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/engine/substrait/type_internal.cc 
b/cpp/src/arrow/engine/substrait/type_internal.cc
index 03d1f999a1..1f9141f36b 100644
--- a/cpp/src/arrow/engine/substrait/type_internal.cc
+++ b/cpp/src/arrow/engine/substrait/type_internal.cc
@@ -77,9 +77,10 @@ Result<FieldVector> FieldsFromProto(int size, const Types& 
types,
     if (types.Get(i).has_struct_()) {
       const auto& struct_ = types.Get(i).struct_();
 
-      ARROW_ASSIGN_OR_RAISE(type, FieldsFromProto(struct_.types_size(), 
struct_.types(),
-                                                  next_name, ext_set, 
conversion_options)
-                                      .Map(arrow::struct_));
+      ARROW_ASSIGN_OR_RAISE(
+          auto fields, FieldsFromProto(struct_.types_size(), struct_.types(), 
next_name,
+                                       ext_set, conversion_options));
+      type = ::arrow::struct_(std::move(fields));
 
       nullable = IsNullable(struct_);
     } else {
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 9267f1e499..86df91268f 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -276,6 +276,17 @@ std::shared_ptr<Field> MaybePromoteNullTypes(const Field& 
existing, const Field&
   // `other` must be null.
   return existing.WithNullable(true);
 }
+
+std::vector<std::shared_ptr<Field>> MakeFields(
+    std::initializer_list<std::pair<std::string, std::shared_ptr<DataType>>> 
init_list) {
+  std::vector<std::shared_ptr<Field>> fields;
+  fields.reserve(init_list.size());
+  for (const auto& [name, type] : init_list) {
+    fields.push_back(field(name, type));
+  }
+  return fields;
+}
+
 }  // namespace
 
 Field::~Field() {}
@@ -2125,12 +2136,24 @@ std::shared_ptr<Schema> 
schema(std::vector<std::shared_ptr<Field>> fields,
   return std::make_shared<Schema>(std::move(fields), std::move(metadata));
 }
 
+std::shared_ptr<Schema> schema(
+    std::initializer_list<std::pair<std::string, std::shared_ptr<DataType>>> 
fields,
+    std::shared_ptr<const KeyValueMetadata> metadata) {
+  return std::make_shared<Schema>(MakeFields(fields), std::move(metadata));
+}
+
 std::shared_ptr<Schema> schema(std::vector<std::shared_ptr<Field>> fields,
                                Endianness endianness,
                                std::shared_ptr<const KeyValueMetadata> 
metadata) {
   return std::make_shared<Schema>(std::move(fields), endianness, 
std::move(metadata));
 }
 
+std::shared_ptr<Schema> schema(
+    std::initializer_list<std::pair<std::string, std::shared_ptr<DataType>>> 
fields,
+    Endianness endianness, std::shared_ptr<const KeyValueMetadata> metadata) {
+  return std::make_shared<Schema>(MakeFields(fields), endianness, 
std::move(metadata));
+}
+
 Result<std::shared_ptr<Schema>> UnifySchemas(
     const std::vector<std::shared_ptr<Schema>>& schemas,
     const Field::MergeOptions field_merge_options) {
@@ -2641,6 +2664,11 @@ std::shared_ptr<DataType> struct_(const 
std::vector<std::shared_ptr<Field>>& fie
   return std::make_shared<StructType>(fields);
 }
 
+std::shared_ptr<DataType> struct_(
+    std::initializer_list<std::pair<std::string, std::shared_ptr<DataType>>> 
fields) {
+  return std::make_shared<StructType>(MakeFields(fields));
+}
+
 std::shared_ptr<DataType> run_end_encoded(std::shared_ptr<arrow::DataType> 
run_end_type,
                                           std::shared_ptr<DataType> 
value_type) {
   return std::make_shared<RunEndEncodedType>(std::move(run_end_type),
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index a8a27139d1..d3b41c8158 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -560,6 +560,10 @@ ARROW_EXPORT std::shared_ptr<DataType> 
time64(TimeUnit::type unit);
 ARROW_EXPORT std::shared_ptr<DataType> struct_(
     const std::vector<std::shared_ptr<Field>>& fields);
 
+/// \brief Create a StructType instance from (name, type) pairs
+ARROW_EXPORT std::shared_ptr<DataType> struct_(
+    std::initializer_list<std::pair<std::string, std::shared_ptr<DataType>>> 
fields);
+
 /// \brief Create a RunEndEncodedType instance
 ARROW_EXPORT std::shared_ptr<DataType> run_end_encoded(
     std::shared_ptr<DataType> run_end_type, std::shared_ptr<DataType> 
value_type);
@@ -629,6 +633,18 @@ std::shared_ptr<Schema> schema(
     std::vector<std::shared_ptr<Field>> fields,
     std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
 
+/// \brief Create a Schema instance from (name, type) pairs
+///
+/// The schema's fields will all be nullable with no associated metadata.
+///
+/// \param fields (name, type) pairs of the schema's fields
+/// \param metadata any custom key-value metadata, default null
+/// \return schema shared_ptr to Schema
+ARROW_EXPORT
+std::shared_ptr<Schema> schema(
+    std::initializer_list<std::pair<std::string, std::shared_ptr<DataType>>> 
fields,
+    std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
+
 /// \brief Create a Schema instance
 ///
 /// \param fields the schema's fields
@@ -640,6 +656,19 @@ std::shared_ptr<Schema> schema(
     std::vector<std::shared_ptr<Field>> fields, Endianness endianness,
     std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
 
+/// \brief Create a Schema instance
+///
+/// The schema's fields will all be nullable with no associated metadata.
+///
+/// \param fields (name, type) pairs of the schema's fields
+/// \param endianness the endianness of the data
+/// \param metadata any custom key-value metadata, default null
+/// \return schema shared_ptr to Schema
+ARROW_EXPORT
+std::shared_ptr<Schema> schema(
+    std::initializer_list<std::pair<std::string, std::shared_ptr<DataType>>> 
fields,
+    Endianness endianness, std::shared_ptr<const KeyValueMetadata> metadata = 
NULLPTR);
+
 /// @}
 
 /// Return the process-wide default memory pool.
diff --git a/cpp/src/arrow/type_test.cc b/cpp/src/arrow/type_test.cc
index b008929e87..c55b33b415 100644
--- a/cpp/src/arrow/type_test.cc
+++ b/cpp/src/arrow/type_test.cc
@@ -414,6 +414,13 @@ TEST_F(TestSchema, Basics) {
   ASSERT_NE(schema4->fingerprint(), schema7->fingerprint());
   ASSERT_EQ(schema6->fingerprint(), schema7->fingerprint());
 #endif
+
+  auto schema8 = ::arrow::schema({field("f0", int8()), field("f1", int32())});
+  auto schema9 = ::arrow::schema({{"f0", int8()}, {"f1", int32()}});
+  auto schema10 = ::arrow::schema({{"f2", int8()}, {"f1", int32()}});
+
+  AssertSchemaEqual(schema8, schema9);
+  AssertSchemaNotEqual(schema8, schema10);
 }
 
 TEST_F(TestSchema, ToString) {
@@ -1479,6 +1486,12 @@ TEST(TestStructType, Basics) {
 
   ASSERT_EQ(struct_type.ToString(), "struct<f0: int32, f1: string, f2: 
uint8>");
 
+  auto t1 = struct_({{"a", int8()}, {"b", utf8()}});
+  auto t2 = struct_({field("a", int8()), field("b", utf8())});
+  auto t3 = struct_({field("c", int8()), field("b", utf8())});
+  ASSERT_TRUE(t1->Equals(t2));
+  ASSERT_TRUE(!t1->Equals(t3));
+
   // TODO(wesm): out of bounds for field(...)
 }
 
diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index f713548d05..3323b7ff8b 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -839,7 +839,7 @@ 
std::function<std::shared_ptr<::arrow::DataType>(FieldVector)> GetNestedFactory(
   switch (inferred_type.id()) {
     case ::arrow::Type::STRUCT:
       if (origin_type.id() == ::arrow::Type::STRUCT) {
-        return ::arrow::struct_;
+        return [](FieldVector fields) { return 
::arrow::struct_(std::move(fields)); };
       }
       break;
     case ::arrow::Type::LIST:

Reply via email to