HippoBaro commented on code in PR #9831:
URL: https://github.com/apache/arrow-rs/pull/9831#discussion_r3174336869
##########
parquet/src/column/writer/mod.rs:
##########
@@ -650,64 +716,100 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a,
E> {
values_offset: usize,
value_indices: Option<&[usize]>,
num_levels: usize,
- def_levels: Option<&[i16]>,
- rep_levels: Option<&[i16]>,
+ def_levels: LevelDataRef<'_>,
+ rep_levels: LevelDataRef<'_>,
) -> Result<usize> {
// Process definition levels and determine how many values to write.
let values_to_write = if self.descr.max_def_level() > 0 {
- let levels = def_levels.ok_or_else(|| {
- general_err!(
- "Definition levels are required, because max definition
level = {}",
- self.descr.max_def_level()
- )
- })?;
-
- let mut values_to_write = 0usize;
let max_def = self.descr.max_def_level();
- let encoder = &mut self.def_levels_encoder;
- match self.page_metrics.definition_level_histogram.as_mut() {
- Some(histogram) => encoder.put_with_observer(levels, |level,
count| {
- values_to_write += count * (level == max_def) as usize;
- histogram.increment_by(level, count as i64);
- }),
- None => encoder.put_with_observer(levels, |level, count| {
- values_to_write += count * (level == max_def) as usize;
- }),
- };
- self.page_metrics.num_page_nulls += (levels.len() -
values_to_write) as u64;
- values_to_write
+ match def_levels {
+ LevelDataRef::Absent => {
+ return Err(general_err!(
+ "Definition levels are required, because max
definition level = {}",
+ self.descr.max_def_level()
+ ));
+ }
+ LevelDataRef::Materialized(levels) => {
+ let mut values_to_write = 0usize;
+ let encoder = &mut self.def_levels_encoder;
+ match
self.page_metrics.definition_level_histogram.as_mut() {
+ Some(histogram) => encoder.put_with_observer(levels,
|level, count| {
+ values_to_write += count * (level == max_def) as
usize;
+ histogram.increment_by(level, count as i64);
+ }),
+ None => encoder.put_with_observer(levels, |level,
count| {
+ values_to_write += count * (level == max_def) as
usize;
+ }),
+ };
+ self.page_metrics.num_page_nulls += (levels.len() -
values_to_write) as u64;
+ values_to_write
+ }
+ LevelDataRef::Uniform { value, count } => {
Review Comment:
Yes! This whole refactor allows me to add this `Uniform` case, which makes
all-null (or really any uniform) data much faster to encode.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]