This is an automated email from the ASF dual-hosted git repository.
JingsongLi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/paimon-mosaic.git
The following commit(s) were added to refs/heads/main by this push:
new 2fc6da6 fix: preserve row count for empty projection (#19)
2fc6da6 is described below
commit 2fc6da63fd6fc124d762e901af5c4f6a83733204
Author: QuakeWang <[email protected]>
AuthorDate: Wed May 20 21:08:57 2026 +0800
fix: preserve row count for empty projection (#19)
---
core/src/reader.rs | 15 +++++++++++----
core/src/reader_tests.rs | 37 +++++++++++++++++++++++++++++++++++++
2 files changed, 48 insertions(+), 4 deletions(-)
diff --git a/core/src/reader.rs b/core/src/reader.rs
index ecf5ea7..251bb87 100644
--- a/core/src/reader.rs
+++ b/core/src/reader.rs
@@ -18,8 +18,7 @@
use std::io;
use std::sync::Arc;
-use arrow_array::ArrayRef;
-use arrow_array::RecordBatch;
+use arrow_array::{ArrayRef, RecordBatch, RecordBatchOptions};
use arrow_schema::{DataType, Field, Schema};
use crate::bucket_reader::{read_typed_value, read_variable_value,
BucketReader, ColumnPageReader};
@@ -947,8 +946,16 @@ impl RowGroupReader {
}
let arrow_schema = std::sync::Arc::new(Schema::new(fields));
- RecordBatch::try_new(arrow_schema, batch_arrays)
- .map_err(|e| io::Error::other(e.to_string()))
+ let batch = if batch_arrays.is_empty() {
+ RecordBatch::try_new_with_options(
+ arrow_schema,
+ batch_arrays,
+ &RecordBatchOptions::new().with_row_count(Some(self.num_rows)),
+ )
+ } else {
+ RecordBatch::try_new(arrow_schema, batch_arrays)
+ };
+ batch.map_err(|e| io::Error::other(e.to_string()))
}
}
diff --git a/core/src/reader_tests.rs b/core/src/reader_tests.rs
index 6c68331..6b8fc58 100644
--- a/core/src/reader_tests.rs
+++ b/core/src/reader_tests.rs
@@ -1221,6 +1221,43 @@ fn test_projection_subset() {
}
}
+#[test]
+fn test_projection_empty_columns_preserves_row_count() {
+ let columns = vec![
+ ("a".to_string(), DataType::Int32, true),
+ ("b".to_string(), DataType::Utf8, true),
+ ];
+ let out = MemOutputFile::new();
+ let mut writer = MosaicWriter::new(
+ out,
+ &columns_to_arrow_schema(&columns),
+ WriterOptions {
+ compression: COMPRESSION_ZSTD,
+ num_buckets: 2,
+ ..Default::default()
+ },
+ )
+ .unwrap();
+
+ let rows = vec![
+ vec![Value::Integer(1), Value::String(b"a".to_vec())],
+ vec![Value::Integer(2), Value::String(b"b".to_vec())],
+ vec![Value::Integer(3), Value::String(b"c".to_vec())],
+ ];
+ write_values(&mut writer, &columns, &rows);
+ writer.close().unwrap();
+ let data = writer.output().buf.clone();
+ let len = data.len() as u64;
+ let reader = MosaicReader::new(ByteArrayInputFile::new(data),
len).unwrap();
+
+ let mut rg = reader.row_group_reader_projected(0, &[]).unwrap();
+ let batch = rg.read_columns().unwrap();
+
+ assert_eq!(batch.num_columns(), 0);
+ assert_eq!(batch.num_rows(), rows.len());
+ assert!(batch.schema().fields().is_empty());
+}
+
#[test]
fn test_projection_single_column() {
let columns = vec![