This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new 1b0aebe ARROW-9598: [C++][Parquet] Fix writing nullable structs 1b0aebe is described below commit 1b0aebea45bcd6b271324fcfc373e4ccc7543eaa Author: Micah Kornfield <emkornfi...@gmail.com> AuthorDate: Mon Aug 10 15:33:10 2020 -0500 ARROW-9598: [C++][Parquet] Fix writing nullable structs Traverse the node hierarchy to ensure we capture the right value count. Closes #7862 from emkornfield/verify_parquetfg Authored-by: Micah Kornfield <emkornfi...@gmail.com> Signed-off-by: Wes McKinney <w...@apache.org> --- cpp/src/parquet/arrow/arrow_reader_writer_test.cc | 17 +++++++++++++++++ cpp/src/parquet/column_writer.cc | 9 ++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc index 661ce7b..476d82f 100644 --- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc +++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc @@ -2344,6 +2344,23 @@ TEST(ArrowReadWrite, SimpleStructRoundTrip) { 2); } +TEST(ArrowReadWrite, SingleColumnNullableStruct) { + auto links = + field("Links", + ::arrow::struct_({field("Backward", ::arrow::int64(), /*nullable=*/true)})); + + auto links_id_array = ::arrow::ArrayFromJSON(links->type(), + "[null, " + "{\"Backward\": 10}" + "]"); + + CheckSimpleRoundtrip( + ::arrow::Table::Make(std::make_shared<::arrow::Schema>( + std::vector<std::shared_ptr<::arrow::Field>>{links}), + {links_id_array}), + 3); +} + // Disabled until implementation can be finished. TEST(TestArrowReadWrite, DISABLED_CanonicalNestedRoundTrip) { auto doc_id = field("DocId", ::arrow::int64(), /*nullable=*/false); diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index f9cf37c..6cb0bae 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -1138,8 +1138,12 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< if (descr_->max_definition_level() > 0) { // Minimal definition level for which spaced values are written int16_t min_spaced_def_level = descr_->max_definition_level(); - if (descr_->schema_node()->is_optional()) { - min_spaced_def_level--; + const ::parquet::schema::Node* node = descr_->schema_node().get(); + while (node != nullptr && !node->is_repeated()) { + if (node->is_optional()) { + min_spaced_def_level--; + } + node = node->parent(); } for (int64_t i = 0; i < num_levels; ++i) { if (def_levels[i] == descr_->max_definition_level()) { @@ -1149,7 +1153,6 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< ++spaced_values_to_write; } } - WriteDefinitionLevels(num_levels, def_levels); } else { // Required field, write all values