This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new 4bbd1332 feat(writer): support length() for both open and closed 
writers (#433)
4bbd1332 is described below

commit 4bbd133256b397e10dda22984d663e0194b7bbb3
Author: Gang Wu <[email protected]>
AuthorDate: Wed Dec 24 16:13:20 2025 +0800

    feat(writer): support length() for both open and closed writers (#433)
    
    Modified Writer::length() to return current flushed position when open
    and final file length when closed. Previously length() required the
    writer to be closed.
---
 src/iceberg/avro/avro_writer.cc       | 17 ++++++++++-------
 src/iceberg/parquet/parquet_writer.cc | 18 +++++++++++-------
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/src/iceberg/avro/avro_writer.cc b/src/iceberg/avro/avro_writer.cc
index 9fec43a1..9d65db15 100644
--- a/src/iceberg/avro/avro_writer.cc
+++ b/src/iceberg/avro/avro_writer.cc
@@ -120,7 +120,14 @@ class AvroWriter::Impl {
 
   bool Closed() const { return writer_ == nullptr; }
 
-  int64_t length() { return total_bytes_; }
+  Result<int64_t> length() {
+    if (Closed()) {
+      return total_bytes_;
+    }
+    // Return current flushed length when writer is still open
+    ICEBERG_ARROW_ASSIGN_OR_RETURN(auto current_pos, 
arrow_output_stream_->Tell());
+    return current_pos;
+  }
 
  private:
   // The schema to write.
@@ -135,6 +142,7 @@ class AvroWriter::Impl {
   std::unique_ptr<::avro::GenericDatum> datum_;
   // Arrow schema to write data.
   ArrowSchema arrow_schema_;
+  // Total length of the written Avro file.
   int64_t total_bytes_ = 0;
 };
 
@@ -162,12 +170,7 @@ Result<Metrics> AvroWriter::metrics() {
   return Invalid("AvroWriter is not closed");
 }
 
-Result<int64_t> AvroWriter::length() {
-  if (impl_->Closed()) {
-    return impl_->length();
-  }
-  return Invalid("AvroWriter is not closed");
-}
+Result<int64_t> AvroWriter::length() { return impl_->length(); }
 
 std::vector<int64_t> AvroWriter::split_offsets() { return {}; }
 
diff --git a/src/iceberg/parquet/parquet_writer.cc 
b/src/iceberg/parquet/parquet_writer.cc
index 44e6133d..886348f0 100644
--- a/src/iceberg/parquet/parquet_writer.cc
+++ b/src/iceberg/parquet/parquet_writer.cc
@@ -107,7 +107,16 @@ class ParquetWriter::Impl {
 
   bool Closed() const { return writer_ == nullptr; }
 
-  int64_t length() const { return total_bytes_; }
+  Result<int64_t> length() {
+    if (Closed()) {
+      return total_bytes_;
+    }
+    // Return current flushed length when writer is still open.
+    // It would be good if we could get the number of buffered bytes
+    // from the internal RowGroupWriter.
+    ICEBERG_ARROW_ASSIGN_OR_RETURN(auto current_pos, output_stream_->Tell());
+    return current_pos;
+  }
 
   std::vector<int64_t> split_offsets() const { return split_offsets_; }
 
@@ -144,12 +153,7 @@ Result<Metrics> ParquetWriter::metrics() {
   return {};
 }
 
-Result<int64_t> ParquetWriter::length() {
-  if (!impl_->Closed()) {
-    return Invalid("ParquetWriter is not closed");
-  }
-  return impl_->length();
-}
+Result<int64_t> ParquetWriter::length() { return impl_->length(); }
 
 std::vector<int64_t> ParquetWriter::split_offsets() {
   if (!impl_->Closed()) {

Reply via email to