pitrou commented on code in PR #41633: URL: https://github.com/apache/arrow/pull/41633#discussion_r1617421711
########## cpp/src/parquet/arrow/arrow_metadata_test.cc: ########## @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gtest/gtest.h" + +#include "arrow/table.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/util/key_value_metadata.h" + +#include "parquet/api/writer.h" + +#include "parquet/arrow/schema.h" +#include "parquet/arrow/writer.h" +#include "parquet/file_writer.h" +#include "parquet/test_util.h" + +namespace parquet::arrow { + +TEST(Metadata, AppendMetadata) { + // A sample table, type and structure does not matter in this test case + auto schema = ::arrow::schema({::arrow::field("f", ::arrow::utf8())}); + auto table = ::arrow::Table::Make( + schema, {::arrow::ArrayFromJSON(::arrow::utf8(), R"(["a", "b", "c"])")}); + + auto sink = CreateOutputStream(); + ArrowWriterProperties::Builder builder; + builder.store_schema(); + ASSERT_OK_AND_ASSIGN(auto writer, + parquet::arrow::FileWriter::Open( + *schema, ::arrow::default_memory_pool(), sink, + parquet::default_writer_properties(), builder.build())); + + auto kv_meta = std::make_shared<KeyValueMetadata>(); + kv_meta->Append("test_key_1", "test_value_1"); + kv_meta->Append("test_key_2", "test_value_2_"); + ASSERT_OK(writer->AddKeyValueMetadata(kv_meta)); + + // Key value metadata that will be added to the file. + auto kv_meta_added = std::make_shared<::arrow::KeyValueMetadata>(); + kv_meta_added->Append("test_key_2", "test_value_2"); + kv_meta_added->Append("test_key_3", "test_value_3"); + + ASSERT_OK(writer->AddKeyValueMetadata(kv_meta_added)); + ASSERT_OK(writer->Close()); + + // return error if the file is closed + ASSERT_RAISES(IOError, writer->AddKeyValueMetadata(kv_meta_added)); + + const auto& key_value_metadata = writer->metadata()->key_value_metadata(); + ASSERT_TRUE(nullptr != key_value_metadata); + + // Verify keys that were added before file writer was closed are present. Review Comment: Can we instead read the file to make sure the metadata is here? ########## cpp/src/parquet/arrow/writer.h: ########## @@ -143,6 +143,16 @@ class PARQUET_EXPORT FileWriter { virtual ~FileWriter(); virtual MemoryPool* memory_pool() const = 0; + /// \brief Add key-value metadata to the file. + /// \param[in] key_value_metadata the metadata to add. + /// \note This will overwrite any existing metadata with the same key. + /// \return Error if Close() has been called. + /// + /// WARNING: If `store_schema` is enabled, `ARROW:schema` would be stored + /// in the key-value metadata. Overwriting this key would result in + /// `store_schema` unusable during read. Review Comment: Let's not use the loaded term "undefined behavior" here? I think the original wording is mostly fine, e.g.: ```suggestion /// `store_schema` being unusable during read. ``` ########## cpp/src/parquet/arrow/arrow_metadata_test.cc: ########## @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gtest/gtest.h" + +#include "arrow/table.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/util/key_value_metadata.h" + +#include "parquet/api/writer.h" + +#include "parquet/arrow/schema.h" +#include "parquet/arrow/writer.h" +#include "parquet/file_writer.h" +#include "parquet/test_util.h" + +namespace parquet::arrow { + +TEST(Metadata, AppendMetadata) { + // A sample table, type and structure does not matter in this test case + auto schema = ::arrow::schema({::arrow::field("f", ::arrow::utf8())}); + auto table = ::arrow::Table::Make( + schema, {::arrow::ArrayFromJSON(::arrow::utf8(), R"(["a", "b", "c"])")}); + + auto sink = CreateOutputStream(); + ArrowWriterProperties::Builder builder; + builder.store_schema(); + ASSERT_OK_AND_ASSIGN(auto writer, + parquet::arrow::FileWriter::Open( + *schema, ::arrow::default_memory_pool(), sink, + parquet::default_writer_properties(), builder.build())); + + auto kv_meta = std::make_shared<KeyValueMetadata>(); + kv_meta->Append("test_key_1", "test_value_1"); + kv_meta->Append("test_key_2", "test_value_2_"); Review Comment: The difference between `test_value_2` and `test_value_2_` is very obscure and easy to miss. Can you make this test clearer? ########## cpp/src/parquet/arrow/writer.h: ########## @@ -143,6 +143,16 @@ class PARQUET_EXPORT FileWriter { virtual ~FileWriter(); virtual MemoryPool* memory_pool() const = 0; + /// \brief Add key-value metadata to the file. + /// \param[in] key_value_metadata the metadata to add. + /// \note This will overwrite any existing metadata with the same key. Review Comment: ```suggestion /// \note This will overwrite any existing metadata with the same key(s). ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
