pitrou commented on code in PR #41580:
URL: https://github.com/apache/arrow/pull/41580#discussion_r1678100051


##########
cpp/src/parquet/metadata.cc:
##########
@@ -132,6 +132,31 @@ std::shared_ptr<Statistics> MakeColumnStats(const 
format::ColumnMetaData& meta_d
   throw ParquetException("Can't decode page statistics for selected column 
type");
 }
 
+template <typename Metadata>
+std::shared_ptr<KeyValueMetadata> CopyKeyValueMetadata(const Metadata& source) 
{
+  std::shared_ptr<KeyValueMetadata> metadata = nullptr;
+  if (source.__isset.key_value_metadata) {
+    metadata = std::make_shared<KeyValueMetadata>();
+    for (const auto& it : source.key_value_metadata) {
+      metadata->Append(it.key, it.value);
+    }
+  }
+  return metadata;
+}
+
+template <typename Metadata>
+void ToThriftKeyValueMetadata(Metadata& metadata, const KeyValueMetadata& 
source) {
+  std::vector<format::KeyValue> key_value_metadata;
+  key_value_metadata.reserve(static_cast<size_t>(source.size()));
+  for (int64_t i = 0; i < source.size(); ++i) {
+    format::KeyValue kv_pair;
+    kv_pair.__set_key(source.key(i));
+    kv_pair.__set_value(source.value(i));
+    key_value_metadata.emplace_back(kv_pair);

Review Comment:
   Nit: avoid a copy
   ```suggestion
       key_value_metadata.emplace_back(std::move(kv_pair));
   ```



##########
cpp/src/parquet/column_writer_test.cc:
##########
@@ -1705,5 +1719,55 @@ TEST(TestColumnWriter, WriteDataPageV2HeaderNullCount) {
   }
 }
 
+using TestInt32Writer = TestPrimitiveWriter<Int32Type>;
+
+TEST_F(TestInt32Writer, NoWriteKeyValueMetadata) {
+  auto writer = this->BuildWriter();
+  writer->Close();
+  auto key_value_metadata = metadata_key_value_metadata();
+  ASSERT_THAT(key_value_metadata, IsNull());
+}
+
+TEST_F(TestInt32Writer, WriteKeyValueMetadata) {
+  auto writer = this->BuildWriter();
+  writer->AddKeyValueMetadata(KeyValueMetadata::Make({"hello"}, {"world"}));
+  writer->Close();
+  auto key_value_metadata = metadata_key_value_metadata();
+  ASSERT_THAT(key_value_metadata, NotNull());
+  ASSERT_THAT(key_value_metadata->size(), 1);
+  ASSERT_OK_AND_ASSIGN(auto value, key_value_metadata->Get("hello"));
+  ASSERT_THAT(value, "world");

Review Comment:
   Should we use `ASSERT_EQ` here so that the test is more readable? Currently 
it's a bit confusing what's being checked exactly.



##########
cpp/src/parquet/metadata.cc:
##########
@@ -132,6 +132,31 @@ std::shared_ptr<Statistics> MakeColumnStats(const 
format::ColumnMetaData& meta_d
   throw ParquetException("Can't decode page statistics for selected column 
type");
 }
 
+template <typename Metadata>
+std::shared_ptr<KeyValueMetadata> CopyKeyValueMetadata(const Metadata& source) 
{
+  std::shared_ptr<KeyValueMetadata> metadata = nullptr;
+  if (source.__isset.key_value_metadata) {
+    metadata = std::make_shared<KeyValueMetadata>();
+    for (const auto& it : source.key_value_metadata) {
+      metadata->Append(it.key, it.value);
+    }
+  }
+  return metadata;
+}
+
+template <typename Metadata>
+void ToThriftKeyValueMetadata(Metadata& metadata, const KeyValueMetadata& 
source) {

Review Comment:
   We tend to avoid non-const references in the codebase, so I would suggest to 
use a pointer instead:
   ```suggestion
   void ToThriftKeyValueMetadata(Metadata* metadata, const KeyValueMetadata& 
source) {
   ```



##########
cpp/src/parquet/metadata.cc:
##########
@@ -132,6 +132,31 @@ std::shared_ptr<Statistics> MakeColumnStats(const 
format::ColumnMetaData& meta_d
   throw ParquetException("Can't decode page statistics for selected column 
type");
 }
 
+template <typename Metadata>
+std::shared_ptr<KeyValueMetadata> CopyKeyValueMetadata(const Metadata& source) 
{
+  std::shared_ptr<KeyValueMetadata> metadata = nullptr;
+  if (source.__isset.key_value_metadata) {
+    metadata = std::make_shared<KeyValueMetadata>();
+    for (const auto& it : source.key_value_metadata) {
+      metadata->Append(it.key, it.value);
+    }
+  }
+  return metadata;
+}
+
+template <typename Metadata>
+void ToThriftKeyValueMetadata(Metadata& metadata, const KeyValueMetadata& 
source) {
+  std::vector<format::KeyValue> key_value_metadata;
+  key_value_metadata.reserve(static_cast<size_t>(source.size()));
+  for (int64_t i = 0; i < source.size(); ++i) {
+    format::KeyValue kv_pair;
+    kv_pair.__set_key(source.key(i));
+    kv_pair.__set_value(source.value(i));
+    key_value_metadata.emplace_back(kv_pair);
+  }
+  metadata.__set_key_value_metadata(key_value_metadata);

Review Comment:
   ```suggestion
     metadata.__set_key_value_metadata(std::move(key_value_metadata));
   ```



##########
cpp/src/parquet/printer.cc:
##########
@@ -64,6 +64,25 @@ void PrintPageEncodingStats(std::ostream& stream,
 // the fixed initial size is just for an example
 #define COL_WIDTH 30
 
+void Put(std::ostream& stream, char c, int n) {

Review Comment:
   Can you give this a more distinctive name, such as `PutChars`?



##########
cpp/src/parquet/column_writer_test.cc:
##########
@@ -1705,5 +1719,55 @@ TEST(TestColumnWriter, WriteDataPageV2HeaderNullCount) {
   }
 }
 
+using TestInt32Writer = TestPrimitiveWriter<Int32Type>;
+
+TEST_F(TestInt32Writer, NoWriteKeyValueMetadata) {
+  auto writer = this->BuildWriter();
+  writer->Close();
+  auto key_value_metadata = metadata_key_value_metadata();
+  ASSERT_THAT(key_value_metadata, IsNull());
+}
+
+TEST_F(TestInt32Writer, WriteKeyValueMetadata) {
+  auto writer = this->BuildWriter();
+  writer->AddKeyValueMetadata(KeyValueMetadata::Make({"hello"}, {"world"}));
+  writer->Close();
+  auto key_value_metadata = metadata_key_value_metadata();
+  ASSERT_THAT(key_value_metadata, NotNull());
+  ASSERT_THAT(key_value_metadata->size(), 1);
+  ASSERT_OK_AND_ASSIGN(auto value, key_value_metadata->Get("hello"));
+  ASSERT_THAT(value, "world");
+}
+
+TEST_F(TestInt32Writer, ResetKeyValueMetadata) {
+  auto writer = this->BuildWriter();
+  writer->AddKeyValueMetadata(KeyValueMetadata::Make({"hello"}, {"world"}));
+  writer->ResetKeyValueMetadata();
+  writer->Close();
+  auto key_value_metadata = metadata_key_value_metadata();
+  ASSERT_THAT(key_value_metadata, IsNull());
+}
+
+TEST_F(TestInt32Writer, WriteKeyValueMetadataEndToEnd) {
+  auto sink = CreateOutputStream();
+  {
+    auto file_writer = ParquetFileWriter::Open(
+        sink, 
std::dynamic_pointer_cast<schema::GroupNode>(schema_.schema_root()));
+    auto rg_writer = file_writer->AppendRowGroup();
+    auto col_writer = rg_writer->NextColumn();
+    col_writer->AddKeyValueMetadata(KeyValueMetadata::Make({"foo"}, {"bar"}));
+    file_writer->Close();
+  }
+  ASSERT_OK_AND_ASSIGN(auto buffer, sink->Finish());
+  auto file_reader =
+      
ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer));
+  auto key_value_metadata =
+      
file_reader->metadata()->RowGroup(0)->ColumnChunk(0)->key_value_metadata();
+  ASSERT_THAT(key_value_metadata, NotNull());
+  ASSERT_THAT(key_value_metadata->size(), 1);
+  ASSERT_OK_AND_ASSIGN(auto value, key_value_metadata->Get("foo"));
+  ASSERT_THAT(value, "bar");

Review Comment:
   Same question here.



##########
cpp/src/parquet/metadata.cc:
##########
@@ -132,6 +132,31 @@ std::shared_ptr<Statistics> MakeColumnStats(const 
format::ColumnMetaData& meta_d
   throw ParquetException("Can't decode page statistics for selected column 
type");
 }
 
+template <typename Metadata>
+std::shared_ptr<KeyValueMetadata> CopyKeyValueMetadata(const Metadata& source) 
{
+  std::shared_ptr<KeyValueMetadata> metadata = nullptr;
+  if (source.__isset.key_value_metadata) {
+    metadata = std::make_shared<KeyValueMetadata>();
+    for (const auto& it : source.key_value_metadata) {
+      metadata->Append(it.key, it.value);
+    }

Review Comment:
   Note that this can probably be made slightly faster if you first build 
vectors of keys and values, then create the `KeyValueMetadata` from those.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to