shangxinli commented on code in PR #552:
URL: https://github.com/apache/iceberg-cpp/pull/552#discussion_r2808244251


##########
src/iceberg/data/data_writer.cc:
##########
@@ -19,20 +19,118 @@
 
 #include "iceberg/data/data_writer.h"
 
+#include "iceberg/file_writer.h"
+#include "iceberg/manifest/manifest_entry.h"
+#include "iceberg/util/macros.h"
+
 namespace iceberg {
 
 class DataWriter::Impl {
  public:
+  static Result<std::unique_ptr<Impl>> Make(DataWriterOptions options) {
+    WriterOptions writer_options;
+    writer_options.path = options.path;
+    writer_options.schema = options.schema;
+    writer_options.io = options.io;
+    writer_options.properties = WriterProperties::FromMap(options.properties);
+
+    ICEBERG_ASSIGN_OR_RAISE(auto writer,
+                            WriterFactoryRegistry::Open(options.format, 
writer_options));
+
+    return std::unique_ptr<Impl>(new Impl(std::move(options), 
std::move(writer)));
+  }
+
+  Status Write(ArrowArray* data) {
+    ICEBERG_PRECHECK(writer_, "Writer not initialized");
+    return writer_->Write(data);
+  }
+
+  Result<int64_t> Length() const {
+    ICEBERG_PRECHECK(writer_, "Writer not initialized");
+    return writer_->length();
+  }
+
+  Status Close() {
+    ICEBERG_PRECHECK(writer_, "Writer not initialized");
+    if (closed_) {
+      // Idempotent: no-op if already closed
+      return {};
+    }
+    ICEBERG_RETURN_UNEXPECTED(writer_->Close());
+    closed_ = true;
+    return {};
+  }
+
+  Result<FileWriter::WriteResult> Metadata() {
+    ICEBERG_PRECHECK(closed_, "Cannot get metadata before closing the writer");
+
+    ICEBERG_ASSIGN_OR_RAISE(auto metrics, writer_->metrics());
+    ICEBERG_ASSIGN_OR_RAISE(auto length, writer_->length());
+    auto split_offsets = writer_->split_offsets();
+
+    auto data_file = std::make_shared<DataFile>();
+    data_file->content = DataFile::Content::kData;
+    data_file->file_path = options_.path;
+    data_file->file_format = options_.format;
+    data_file->partition = options_.partition;
+    data_file->record_count = metrics.row_count.value_or(0);
+    data_file->file_size_in_bytes = length;
+    data_file->sort_order_id = options_.sort_order_id;
+    data_file->split_offsets = std::move(split_offsets);
+
+    // Convert metrics maps from unordered_map to map
+    for (const auto& [col_id, size] : metrics.column_sizes) {
+      data_file->column_sizes[col_id] = size;

Review Comment:
   That would be a nice cleanup. For now I've simplified... Changing Metrics 
and DataFile to use the same map type would be a good follow-up.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to