This is an automated email from the ASF dual-hosted git repository.
wjones127 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new a988302495 GH-34242: [C++][Parquet] Optimize comment and move for
shared_ptr in parquet schema (#34243)
a988302495 is described below
commit a9883024951e1e5a2c587d54d5794b89976690aa
Author: mwish <[email protected]>
AuthorDate: Wed Feb 22 00:56:39 2023 +0800
GH-34242: [C++][Parquet] Optimize comment and move for shared_ptr in
parquet schema (#34243)
### Rationale for this change
This patch fix some comments, and remove some copying for shared_ptr, using
reference or `std::move` instead.
### What changes are included in this PR?
Some comment and shared_ptr usage fix
### Are these changes tested?
No
### Are there any user-facing changes?
No
* Closes: #34242
Authored-by: mwish <[email protected]>
Signed-off-by: Will Jones <[email protected]>
---
cpp/src/parquet/arrow/schema.cc | 23 +++++++++++------------
cpp/src/parquet/level_conversion.h | 7 ++++---
cpp/src/parquet/schema.h | 8 ++++----
3 files changed, 19 insertions(+), 19 deletions(-)
diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index ac827bed20..c5d5e0743a 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -134,8 +134,8 @@ Status StructToNode(const
std::shared_ptr<::arrow::StructType>& type,
"Consider adding a dummy child field.");
}
- *out = GroupNode::Make(name, RepetitionFromNullable(nullable),
std::move(children),
- nullptr, field_id);
+ *out = GroupNode::Make(name, RepetitionFromNullable(nullable), children,
nullptr,
+ field_id);
return Status::OK();
}
@@ -514,7 +514,7 @@ Status GroupToStruct(const GroupNode& node, LevelInfo
current_levels,
std::vector<std::shared_ptr<Field>> arrow_fields;
out->children.resize(node.field_count());
// All level increments for the node are expected to happen by callers.
- // This is required because repeated elements need to have there own
+ // This is required because repeated elements need to have their own
// SchemaField.
for (int i = 0; i < node.field_count(); i++) {
@@ -722,7 +722,7 @@ Status GroupToSchemaField(const GroupNode& node, LevelInfo
current_levels,
ctx->LinkParent(&out->children[0], out);
out->level_info = current_levels;
// At this point current_levels contains this list as the def level, we
need to
- // use the previous ancenstor of thi slist.
+ // use the previous ancestor of this list.
out->level_info.repeated_ancestor_def_level = repeated_ancestor_def_level;
return Status::OK();
} else {
@@ -831,7 +831,7 @@ Status GetOriginSchema(const std::shared_ptr<const
KeyValueMetadata>& metadata,
}
// Restore original Arrow field information that was serialized as Parquet
metadata
-// but that is not necessarily present in the field reconstitued from Parquet
data
+// but that is not necessarily present in the field reconstituted from Parquet
data
// (for example, Parquet timestamp types doesn't carry timezone information).
Result<bool> ApplyOriginalMetadata(const Field& origin_field, SchemaField*
inferred);
@@ -876,8 +876,8 @@ Result<bool> ApplyOriginalStorageMetadata(const Field&
origin_field,
SchemaField* inferred) {
bool modified = false;
- auto origin_type = origin_field.type();
- auto inferred_type = inferred->field->type();
+ auto& origin_type = origin_field.type();
+ auto& inferred_type = inferred->field->type();
const int num_children = inferred_type->num_fields();
@@ -916,7 +916,7 @@ Result<bool> ApplyOriginalStorageMetadata(const Field&
origin_field,
// If the data is tz-aware, then set the original time zone, since Parquet
// has no native storage for timezones
- if (ts_type.timezone() == "UTC" && ts_origin_type.timezone() != "") {
+ if (ts_type.timezone() == "UTC" && !ts_origin_type.timezone().empty()) {
if (ts_type.unit() == ts_origin_type.unit()) {
inferred->field = inferred->field->WithType(origin_type);
} else {
@@ -937,7 +937,7 @@ Result<bool> ApplyOriginalStorageMetadata(const Field&
origin_field,
if (origin_type->id() == ::arrow::Type::DICTIONARY &&
inferred_type->id() != ::arrow::Type::DICTIONARY &&
IsDictionaryReadSupported(*inferred_type)) {
- // Direct dictionary reads are only suppored for a couple primitive types,
+ // Direct dictionary reads are only supported for a couple primitive types,
// so no need to recurse on value types.
const auto& dict_origin_type =
checked_cast<const ::arrow::DictionaryType&>(*origin_type);
@@ -978,8 +978,7 @@ Result<bool> ApplyOriginalStorageMetadata(const Field&
origin_field,
Result<bool> ApplyOriginalMetadata(const Field& origin_field, SchemaField*
inferred) {
bool modified = false;
- auto origin_type = origin_field.type();
- auto inferred_type = inferred->field->type();
+ auto& origin_type = origin_field.type();
if (origin_type->id() == ::arrow::Type::EXTENSION) {
const auto& ex_type = checked_cast<const
::arrow::ExtensionType&>(*origin_type);
@@ -1101,7 +1100,7 @@ Status SchemaManifest::Make(const SchemaDescriptor*
schema,
continue;
}
- auto origin_field = manifest->origin_schema->field(i);
+ auto& origin_field = manifest->origin_schema->field(i);
RETURN_NOT_OK(ApplyOriginalMetadata(*origin_field, out_field));
}
return Status::OK();
diff --git a/cpp/src/parquet/level_conversion.h
b/cpp/src/parquet/level_conversion.h
index e45a288e8c..dc3b3b64d6 100644
--- a/cpp/src/parquet/level_conversion.h
+++ b/cpp/src/parquet/level_conversion.h
@@ -32,9 +32,10 @@ struct PARQUET_EXPORT LevelInfo {
LevelInfo(int32_t null_slots, int32_t definition_level, int32_t
repetition_level,
int32_t repeated_ancestor_definition_level)
: null_slot_usage(null_slots),
- def_level(definition_level),
- rep_level(repetition_level),
- repeated_ancestor_def_level(repeated_ancestor_definition_level) {}
+ def_level(static_cast<int16_t>(definition_level)),
+ rep_level(static_cast<int16_t>(repetition_level)),
+ repeated_ancestor_def_level(
+ static_cast<int16_t>(repeated_ancestor_definition_level)) {}
bool operator==(const LevelInfo& b) const {
return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
diff --git a/cpp/src/parquet/schema.h b/cpp/src/parquet/schema.h
index f6c5416a1e..896ec1e479 100644
--- a/cpp/src/parquet/schema.h
+++ b/cpp/src/parquet/schema.h
@@ -189,8 +189,8 @@ class PARQUET_EXPORT Node {
};
// Save our breath all over the place with these typedefs
-typedef std::shared_ptr<Node> NodePtr;
-typedef std::vector<NodePtr> NodeVector;
+using NodePtr = std::shared_ptr<Node>;
+using NodeVector = std::vector<NodePtr>;
// A type that is one of the primitive Parquet storage types. In addition to
// the other type metadata (name, repetition level, logical type), also has the
@@ -216,8 +216,8 @@ class PARQUET_EXPORT PrimitiveNode : public Node {
std::shared_ptr<const LogicalType> logical_type,
Type::type primitive_type, int primitive_length =
-1,
int field_id = -1) {
- return NodePtr(new PrimitiveNode(name, repetition, logical_type,
primitive_type,
- primitive_length, field_id));
+ return NodePtr(new PrimitiveNode(name, repetition, std::move(logical_type),
+ primitive_type, primitive_length,
field_id));
}
bool Equals(const Node* other) const override;