This is an automated email from the ASF dual-hosted git repository.

wjones127 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new a988302495 GH-34242: [C++][Parquet] Optimize comment and move for 
shared_ptr in parquet schema (#34243)
a988302495 is described below

commit a9883024951e1e5a2c587d54d5794b89976690aa
Author: mwish <[email protected]>
AuthorDate: Wed Feb 22 00:56:39 2023 +0800

    GH-34242: [C++][Parquet] Optimize comment and move for shared_ptr in 
parquet schema (#34243)
    
    
    
    ### Rationale for this change
    
    This patch fix some comments, and remove some copying for shared_ptr, using 
reference or `std::move` instead.
    
    ### What changes are included in this PR?
    
    Some comment and shared_ptr usage fix
    
    ### Are these changes tested?
    
    No
    
    ### Are there any user-facing changes?
    
    No
    
    * Closes: #34242
    
    Authored-by: mwish <[email protected]>
    Signed-off-by: Will Jones <[email protected]>
---
 cpp/src/parquet/arrow/schema.cc    | 23 +++++++++++------------
 cpp/src/parquet/level_conversion.h |  7 ++++---
 cpp/src/parquet/schema.h           |  8 ++++----
 3 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index ac827bed20..c5d5e0743a 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -134,8 +134,8 @@ Status StructToNode(const 
std::shared_ptr<::arrow::StructType>& type,
                                   "Consider adding a dummy child field.");
   }
 
-  *out = GroupNode::Make(name, RepetitionFromNullable(nullable), 
std::move(children),
-                         nullptr, field_id);
+  *out = GroupNode::Make(name, RepetitionFromNullable(nullable), children, 
nullptr,
+                         field_id);
   return Status::OK();
 }
 
@@ -514,7 +514,7 @@ Status GroupToStruct(const GroupNode& node, LevelInfo 
current_levels,
   std::vector<std::shared_ptr<Field>> arrow_fields;
   out->children.resize(node.field_count());
   // All level increments for the node are expected to happen by callers.
-  // This is required because repeated elements need to have there own
+  // This is required because repeated elements need to have their own
   // SchemaField.
 
   for (int i = 0; i < node.field_count(); i++) {
@@ -722,7 +722,7 @@ Status GroupToSchemaField(const GroupNode& node, LevelInfo 
current_levels,
     ctx->LinkParent(&out->children[0], out);
     out->level_info = current_levels;
     // At this point current_levels contains this list as the def level, we 
need to
-    // use the previous ancenstor of thi slist.
+    // use the previous ancestor of this list.
     out->level_info.repeated_ancestor_def_level = repeated_ancestor_def_level;
     return Status::OK();
   } else {
@@ -831,7 +831,7 @@ Status GetOriginSchema(const std::shared_ptr<const 
KeyValueMetadata>& metadata,
 }
 
 // Restore original Arrow field information that was serialized as Parquet 
metadata
-// but that is not necessarily present in the field reconstitued from Parquet 
data
+// but that is not necessarily present in the field reconstituted from Parquet 
data
 // (for example, Parquet timestamp types doesn't carry timezone information).
 
 Result<bool> ApplyOriginalMetadata(const Field& origin_field, SchemaField* 
inferred);
@@ -876,8 +876,8 @@ Result<bool> ApplyOriginalStorageMetadata(const Field& 
origin_field,
                                           SchemaField* inferred) {
   bool modified = false;
 
-  auto origin_type = origin_field.type();
-  auto inferred_type = inferred->field->type();
+  auto& origin_type = origin_field.type();
+  auto& inferred_type = inferred->field->type();
 
   const int num_children = inferred_type->num_fields();
 
@@ -916,7 +916,7 @@ Result<bool> ApplyOriginalStorageMetadata(const Field& 
origin_field,
 
     // If the data is tz-aware, then set the original time zone, since Parquet
     // has no native storage for timezones
-    if (ts_type.timezone() == "UTC" && ts_origin_type.timezone() != "") {
+    if (ts_type.timezone() == "UTC" && !ts_origin_type.timezone().empty()) {
       if (ts_type.unit() == ts_origin_type.unit()) {
         inferred->field = inferred->field->WithType(origin_type);
       } else {
@@ -937,7 +937,7 @@ Result<bool> ApplyOriginalStorageMetadata(const Field& 
origin_field,
   if (origin_type->id() == ::arrow::Type::DICTIONARY &&
       inferred_type->id() != ::arrow::Type::DICTIONARY &&
       IsDictionaryReadSupported(*inferred_type)) {
-    // Direct dictionary reads are only suppored for a couple primitive types,
+    // Direct dictionary reads are only supported for a couple primitive types,
     // so no need to recurse on value types.
     const auto& dict_origin_type =
         checked_cast<const ::arrow::DictionaryType&>(*origin_type);
@@ -978,8 +978,7 @@ Result<bool> ApplyOriginalStorageMetadata(const Field& 
origin_field,
 Result<bool> ApplyOriginalMetadata(const Field& origin_field, SchemaField* 
inferred) {
   bool modified = false;
 
-  auto origin_type = origin_field.type();
-  auto inferred_type = inferred->field->type();
+  auto& origin_type = origin_field.type();
 
   if (origin_type->id() == ::arrow::Type::EXTENSION) {
     const auto& ex_type = checked_cast<const 
::arrow::ExtensionType&>(*origin_type);
@@ -1101,7 +1100,7 @@ Status SchemaManifest::Make(const SchemaDescriptor* 
schema,
       continue;
     }
 
-    auto origin_field = manifest->origin_schema->field(i);
+    auto& origin_field = manifest->origin_schema->field(i);
     RETURN_NOT_OK(ApplyOriginalMetadata(*origin_field, out_field));
   }
   return Status::OK();
diff --git a/cpp/src/parquet/level_conversion.h 
b/cpp/src/parquet/level_conversion.h
index e45a288e8c..dc3b3b64d6 100644
--- a/cpp/src/parquet/level_conversion.h
+++ b/cpp/src/parquet/level_conversion.h
@@ -32,9 +32,10 @@ struct PARQUET_EXPORT LevelInfo {
   LevelInfo(int32_t null_slots, int32_t definition_level, int32_t 
repetition_level,
             int32_t repeated_ancestor_definition_level)
       : null_slot_usage(null_slots),
-        def_level(definition_level),
-        rep_level(repetition_level),
-        repeated_ancestor_def_level(repeated_ancestor_definition_level) {}
+        def_level(static_cast<int16_t>(definition_level)),
+        rep_level(static_cast<int16_t>(repetition_level)),
+        repeated_ancestor_def_level(
+            static_cast<int16_t>(repeated_ancestor_definition_level)) {}
 
   bool operator==(const LevelInfo& b) const {
     return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
diff --git a/cpp/src/parquet/schema.h b/cpp/src/parquet/schema.h
index f6c5416a1e..896ec1e479 100644
--- a/cpp/src/parquet/schema.h
+++ b/cpp/src/parquet/schema.h
@@ -189,8 +189,8 @@ class PARQUET_EXPORT Node {
 };
 
 // Save our breath all over the place with these typedefs
-typedef std::shared_ptr<Node> NodePtr;
-typedef std::vector<NodePtr> NodeVector;
+using NodePtr = std::shared_ptr<Node>;
+using NodeVector = std::vector<NodePtr>;
 
 // A type that is one of the primitive Parquet storage types. In addition to
 // the other type metadata (name, repetition level, logical type), also has the
@@ -216,8 +216,8 @@ class PARQUET_EXPORT PrimitiveNode : public Node {
                              std::shared_ptr<const LogicalType> logical_type,
                              Type::type primitive_type, int primitive_length = 
-1,
                              int field_id = -1) {
-    return NodePtr(new PrimitiveNode(name, repetition, logical_type, 
primitive_type,
-                                     primitive_length, field_id));
+    return NodePtr(new PrimitiveNode(name, repetition, std::move(logical_type),
+                                     primitive_type, primitive_length, 
field_id));
   }
 
   bool Equals(const Node* other) const override;

Reply via email to