This is an automated email from the ASF dual-hosted git repository.

xuanwo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new 17b5b2d  feat: add Metadata method to Reader and its implementations 
(#235)
17b5b2d is described below

commit 17b5b2d30e67e314af31bbec5cc70f9798199e1c
Author: chao liu <[email protected]>
AuthorDate: Tue Sep 23 17:54:37 2025 +0800

    feat: add Metadata method to Reader and its implementations (#235)
    
    - Introduced a new virtual method `Metadata()` in the `Reader` class to
    retrieve file metadata.
    - Implemented `Metadata()` in `AvroReader` to return key-value pairs
    from the Avro file's metadata.
    - Implemented `Metadata()` in `ParquetReader` to extract and return
    key-value pairs from the Parquet file's metadata.
    
    ---------
    
    Co-authored-by: nullccxsy <[email protected]>
---
 src/iceberg/avro/avro_reader.cc       | 22 ++++++++++++++++++++++
 src/iceberg/avro/avro_reader.h        |  2 ++
 src/iceberg/file_reader.h             |  3 +++
 src/iceberg/parquet/parquet_reader.cc | 26 ++++++++++++++++++++++++++
 src/iceberg/parquet/parquet_reader.h  |  2 ++
 5 files changed, 55 insertions(+)

diff --git a/src/iceberg/avro/avro_reader.cc b/src/iceberg/avro/avro_reader.cc
index 048cd49..6452612 100644
--- a/src/iceberg/avro/avro_reader.cc
+++ b/src/iceberg/avro/avro_reader.cc
@@ -173,6 +173,24 @@ class AvroReader::Impl {
     return arrow_schema;
   }
 
+  Result<std::unordered_map<std::string, std::string>> Metadata() {
+    if (reader_ == nullptr) {
+      return Invalid("Reader is not opened");
+    }
+
+    const auto& metadata = reader_->metadata();
+
+    std::unordered_map<std::string, std::string> metadata_map;
+    metadata_map.reserve(metadata.size());
+
+    for (const auto& pair : metadata) {
+      metadata_map.insert_or_assign(pair.first,
+                                    std::string(pair.second.begin(), 
pair.second.end()));
+    }
+
+    return metadata_map;
+  }
+
  private:
   Status InitReadContext() {
     context_ = std::make_unique<ReadContext>();
@@ -241,6 +259,10 @@ Result<std::optional<ArrowArray>> AvroReader::Next() { 
return impl_->Next(); }
 
 Result<ArrowSchema> AvroReader::Schema() { return impl_->Schema(); }
 
+Result<std::unordered_map<std::string, std::string>> AvroReader::Metadata() {
+  return impl_->Metadata();
+}
+
 Status AvroReader::Open(const ReaderOptions& options) {
   impl_ = std::make_unique<Impl>();
   return impl_->Open(options);
diff --git a/src/iceberg/avro/avro_reader.h b/src/iceberg/avro/avro_reader.h
index 07737bb..24f95f5 100644
--- a/src/iceberg/avro/avro_reader.h
+++ b/src/iceberg/avro/avro_reader.h
@@ -39,6 +39,8 @@ class ICEBERG_BUNDLE_EXPORT AvroReader : public Reader {
 
   Result<ArrowSchema> Schema() final;
 
+  Result<std::unordered_map<std::string, std::string>> Metadata() final;
+
  private:
   class Impl;
   std::unique_ptr<Impl> impl_;
diff --git a/src/iceberg/file_reader.h b/src/iceberg/file_reader.h
index 8a59e33..d25a5e4 100644
--- a/src/iceberg/file_reader.h
+++ b/src/iceberg/file_reader.h
@@ -54,6 +54,9 @@ class ICEBERG_EXPORT Reader {
 
   /// \brief Get the schema of the data.
   virtual Result<ArrowSchema> Schema() = 0;
+
+  /// \brief Get the metadata of the file.
+  virtual Result<std::unordered_map<std::string, std::string>> Metadata() = 0;
 };
 
 /// \brief A split of the file to read.
diff --git a/src/iceberg/parquet/parquet_reader.cc 
b/src/iceberg/parquet/parquet_reader.cc
index 4c86802..e57b98e 100644
--- a/src/iceberg/parquet/parquet_reader.cc
+++ b/src/iceberg/parquet/parquet_reader.cc
@@ -26,6 +26,7 @@
 #include <arrow/record_batch.h>
 #include <arrow/result.h>
 #include <arrow/type.h>
+#include <arrow/util/key_value_metadata.h>
 #include <parquet/arrow/reader.h>
 #include <parquet/arrow/schema.h>
 #include <parquet/file_reader.h>
@@ -185,6 +186,27 @@ class ParquetReader::Impl {
     return arrow_schema;
   }
 
+  Result<std::unordered_map<std::string, std::string>> Metadata() {
+    if (reader_ == nullptr) {
+      return Invalid("Reader is not opened");
+    }
+
+    auto metadata = reader_->parquet_reader()->metadata();
+    if (!metadata) {
+      return Invalid("Failed to get Parquet file metadata");
+    }
+
+    const auto& kv_metadata = metadata->key_value_metadata();
+    if (!kv_metadata) {
+      return std::unordered_map<std::string, std::string>{};
+    }
+
+    std::unordered_map<std::string, std::string> metadata_map;
+    kv_metadata->ToUnorderedMap(&metadata_map);
+
+    return metadata_map;
+  }
+
  private:
   Status InitReadContext() {
     context_ = std::make_unique<ReadContext>();
@@ -251,6 +273,10 @@ Result<std::optional<ArrowArray>> ParquetReader::Next() { 
return impl_->Next();
 
 Result<ArrowSchema> ParquetReader::Schema() { return impl_->Schema(); }
 
+Result<std::unordered_map<std::string, std::string>> ParquetReader::Metadata() 
{
+  return impl_->Metadata();
+}
+
 Status ParquetReader::Open(const ReaderOptions& options) {
   impl_ = std::make_unique<Impl>();
   return impl_->Open(options);
diff --git a/src/iceberg/parquet/parquet_reader.h 
b/src/iceberg/parquet/parquet_reader.h
index 23d34df..0604230 100644
--- a/src/iceberg/parquet/parquet_reader.h
+++ b/src/iceberg/parquet/parquet_reader.h
@@ -39,6 +39,8 @@ class ICEBERG_BUNDLE_EXPORT ParquetReader : public Reader {
 
   Result<ArrowSchema> Schema() final;
 
+  Result<std::unordered_map<std::string, std::string>> Metadata() final;
+
  private:
   class Impl;
   std::unique_ptr<Impl> impl_;

Reply via email to