This is an automated email from the ASF dual-hosted git repository.

JingsongLi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/paimon-mosaic.git


The following commit(s) were added to refs/heads/main by this push:
     new 2fc6da6  fix: preserve row count for empty projection (#19)
2fc6da6 is described below

commit 2fc6da63fd6fc124d762e901af5c4f6a83733204
Author: QuakeWang <[email protected]>
AuthorDate: Wed May 20 21:08:57 2026 +0800

    fix: preserve row count for empty projection (#19)
---
 core/src/reader.rs       | 15 +++++++++++----
 core/src/reader_tests.rs | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/core/src/reader.rs b/core/src/reader.rs
index ecf5ea7..251bb87 100644
--- a/core/src/reader.rs
+++ b/core/src/reader.rs
@@ -18,8 +18,7 @@
 use std::io;
 use std::sync::Arc;
 
-use arrow_array::ArrayRef;
-use arrow_array::RecordBatch;
+use arrow_array::{ArrayRef, RecordBatch, RecordBatchOptions};
 use arrow_schema::{DataType, Field, Schema};
 
 use crate::bucket_reader::{read_typed_value, read_variable_value, 
BucketReader, ColumnPageReader};
@@ -947,8 +946,16 @@ impl RowGroupReader {
         }
 
         let arrow_schema = std::sync::Arc::new(Schema::new(fields));
-        RecordBatch::try_new(arrow_schema, batch_arrays)
-            .map_err(|e| io::Error::other(e.to_string()))
+        let batch = if batch_arrays.is_empty() {
+            RecordBatch::try_new_with_options(
+                arrow_schema,
+                batch_arrays,
+                &RecordBatchOptions::new().with_row_count(Some(self.num_rows)),
+            )
+        } else {
+            RecordBatch::try_new(arrow_schema, batch_arrays)
+        };
+        batch.map_err(|e| io::Error::other(e.to_string()))
     }
 }
 
diff --git a/core/src/reader_tests.rs b/core/src/reader_tests.rs
index 6c68331..6b8fc58 100644
--- a/core/src/reader_tests.rs
+++ b/core/src/reader_tests.rs
@@ -1221,6 +1221,43 @@ fn test_projection_subset() {
     }
 }
 
+#[test]
+fn test_projection_empty_columns_preserves_row_count() {
+    let columns = vec![
+        ("a".to_string(), DataType::Int32, true),
+        ("b".to_string(), DataType::Utf8, true),
+    ];
+    let out = MemOutputFile::new();
+    let mut writer = MosaicWriter::new(
+        out,
+        &columns_to_arrow_schema(&columns),
+        WriterOptions {
+            compression: COMPRESSION_ZSTD,
+            num_buckets: 2,
+            ..Default::default()
+        },
+    )
+    .unwrap();
+
+    let rows = vec![
+        vec![Value::Integer(1), Value::String(b"a".to_vec())],
+        vec![Value::Integer(2), Value::String(b"b".to_vec())],
+        vec![Value::Integer(3), Value::String(b"c".to_vec())],
+    ];
+    write_values(&mut writer, &columns, &rows);
+    writer.close().unwrap();
+    let data = writer.output().buf.clone();
+    let len = data.len() as u64;
+    let reader = MosaicReader::new(ByteArrayInputFile::new(data), 
len).unwrap();
+
+    let mut rg = reader.row_group_reader_projected(0, &[]).unwrap();
+    let batch = rg.read_columns().unwrap();
+
+    assert_eq!(batch.num_columns(), 0);
+    assert_eq!(batch.num_rows(), rows.len());
+    assert!(batch.schema().fields().is_empty());
+}
+
 #[test]
 fn test_projection_single_column() {
     let columns = vec![

Reply via email to