Jefffrey commented on code in PR #10064:
URL: https://github.com/apache/arrow-rs/pull/10064#discussion_r3387341797
##########
parquet/src/arrow/arrow_writer/mod.rs:
##########
@@ -5664,4 +5196,241 @@ mod tests {
let cc = file_meta.row_group(0).column(0);
assert!(cc.column_index_range().is_none());
}
+
+ /// Writes a single-column RecordBatch to an in-memory Parquet buffer.
+ fn write_column_to_bytes(array: ArrayRef) -> Bytes {
+ let schema = Arc::new(Schema::new(vec![Field::new(
+ "col",
+ array.data_type().clone(),
+ true,
+ )]));
+ let mut buf = Vec::new();
+ let mut writer =
+ ArrowWriter::try_new(&mut buf, schema.clone(),
None).expect("create writer");
+ writer
+ .write(&RecordBatch::try_new(schema, vec![array]).unwrap())
+ .unwrap();
+ writer.close().unwrap();
Review Comment:
```suggestion
let buf = get_bytes_after_close(
schema.clone(),
&RecordBatch::try_new(schema, vec![array]).unwrap(),
);
```
##########
parquet/src/arrow/arrow_writer/levels.rs:
##########
@@ -44,13 +44,44 @@ use crate::column::chunker::CdcChunk;
use crate::column::writer::LevelDataRef;
use crate::errors::{ParquetError, Result};
use arrow_array::cast::AsArray;
-use arrow_array::{Array, ArrayRef, OffsetSizeTrait};
+use arrow_array::types::RunEndIndexType;
+use arrow_array::{Array, ArrayRef, Int32Array, OffsetSizeTrait, RunArray};
use arrow_buffer::bit_iterator::BitIndexIterator;
use arrow_buffer::{NullBuffer, OffsetBuffer, ScalarBuffer};
use arrow_schema::{DataType, Field};
use std::ops::Range;
use std::sync::Arc;
+/// Expands a [`DataType::RunEndEncoded`] array into a flat (logical) array of
its values type.
+///
+/// use `arrow_select::take` to materialize the full-length flat array.
+/// This is intentionally simple (O(n)); efficiency can/should be improved
+fn expand_ree_array(array: &ArrayRef) -> Result<ArrayRef> {
+ match array.data_type() {
+ DataType::RunEndEncoded(run_ends_field, _) => match
run_ends_field.data_type() {
+ DataType::Int16 =>
expand_typed_ree(array.as_run::<arrow_array::types::Int16Type>()),
+ DataType::Int32 =>
expand_typed_ree(array.as_run::<arrow_array::types::Int32Type>()),
+ DataType::Int64 =>
expand_typed_ree(array.as_run::<arrow_array::types::Int64Type>()),
+ dt => Err(arrow_err!(
+ "Unsupported run-end type for REE expansion: {}",
+ dt
+ )),
+ },
+ _ => unreachable!("expand_ree_array called on non-REE array"),
+ }
Review Comment:
```suggestion
downcast_run_array!(
array => expand_typed_ree(array),
_ => unreachable!("expand_ree_array called on non-REE array"),
)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]