Repository: parquet-cpp Updated Branches: refs/heads/master df59ffcf6 -> aacc8b528
PARQUET-842: Do not set unnecessary fields in the parquet::SchemaElement The Impala Parquet scanner can reject our data otherwise Author: Wes McKinney <[email protected]> Closes #226 from wesm/PARQUET-842 and squashes the following commits: 0cfa53c [Wes McKinney] Do not set field_id in SchemaElement until we understand why it is causing parquet-mr problems 4220b48 [Wes McKinney] Do not set unnecessary fields in the parquet::SchemaElement Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/aacc8b52 Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/aacc8b52 Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/aacc8b52 Branch: refs/heads/master Commit: aacc8b52816a660f2fb9af525c66c56f845ddf2c Parents: df59ffc Author: Wes McKinney <[email protected]> Authored: Wed Jan 25 08:14:31 2017 +0100 Committer: Uwe L. Korn <[email protected]> Committed: Wed Jan 25 08:14:31 2017 +0100 ---------------------------------------------------------------------- src/parquet/schema/converter.cc | 2 -- src/parquet/schema/test-util.h | 6 ------ src/parquet/schema/types.cc | 16 ++++++++-------- 3 files changed, 8 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/aacc8b52/src/parquet/schema/converter.cc ---------------------------------------------------------------------- diff --git a/src/parquet/schema/converter.cc b/src/parquet/schema/converter.cc index 08eeb66..3b18af3 100644 --- a/src/parquet/schema/converter.cc +++ b/src/parquet/schema/converter.cc @@ -96,8 +96,6 @@ class SchemaVisitor : public Node::ConstVisitor { void Visit(const Node* node) override { format::SchemaElement element; node->ToParquet(&element); - // Override field_id here as we can get user-generated Nodes without a valid id - element.__set_field_id(elements_->size()); elements_->push_back(element); if (node->is_group()) { http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/aacc8b52/src/parquet/schema/test-util.h ---------------------------------------------------------------------- diff --git a/src/parquet/schema/test-util.h b/src/parquet/schema/test-util.h index ee9e225..752b8f3 100644 --- a/src/parquet/schema/test-util.h +++ b/src/parquet/schema/test-util.h @@ -42,11 +42,6 @@ static inline SchemaElement NewPrimitive(const std::string& name, result.__set_repetition_type(repetition); result.__set_type(type); result.__set_num_children(0); - result.__set_field_id(id); - // Set default (non-set) values - result.__set_type_length(-1); - result.__set_precision(-1); - result.__set_scale(-1); return result; } @@ -57,7 +52,6 @@ static inline SchemaElement NewGroup(const std::string& name, result.__set_name(name); result.__set_repetition_type(repetition); result.__set_num_children(num_children); - result.__set_field_id(id); return result; } http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/aacc8b52/src/parquet/schema/types.cc ---------------------------------------------------------------------- diff --git a/src/parquet/schema/types.cc b/src/parquet/schema/types.cc index 043ef00..7d452c3 100644 --- a/src/parquet/schema/types.cc +++ b/src/parquet/schema/types.cc @@ -86,16 +86,16 @@ PrimitiveNode::PrimitiveNode(const std::string& name, Repetition::type repetitio physical_type_(type), type_length_(length) { std::stringstream ss; - decimal_metadata_.isset = false; + + // PARQUET-842: In an earlier revision, decimal_metadata_.isset was being + // set to true, but Impala will raise an incompatible metadata in such cases + memset(&decimal_metadata_, 0, sizeof(decimal_metadata_)); + // Check if the physical and logical types match // Mapping referred from Apache parquet-mr as on 2016-02-22 switch (logical_type) { case LogicalType::NONE: // Logical type not set - // Clients should be able to read these values - decimal_metadata_.isset = true; - decimal_metadata_.precision = precision; - decimal_metadata_.scale = scale; break; case LogicalType::UTF8: case LogicalType::JSON: @@ -289,7 +289,6 @@ void GroupNode::ToParquet(void* opaque_element) const { if (logical_type_ != LogicalType::NONE) { element->__set_converted_type(ToThrift(logical_type_)); } - // FIXME: SchemaFlattener does this for us: element->__set_field_id(id_); } void PrimitiveNode::ToParquet(void* opaque_element) const { @@ -302,8 +301,9 @@ void PrimitiveNode::ToParquet(void* opaque_element) const { element->__set_converted_type(ToThrift(logical_type_)); } element->__set_type(ToThrift(physical_type_)); - // FIXME: SchemaFlattener does this for us: element->__set_field_id(id_); - element->__set_type_length(type_length_); + if (physical_type_ == Type::FIXED_LEN_BYTE_ARRAY) { + element->__set_type_length(type_length_); + } if (decimal_metadata_.isset) { element->__set_precision(decimal_metadata_.precision); element->__set_scale(decimal_metadata_.scale);
