yordan-pavlov commented on a change in pull request #1041:
URL: https://github.com/apache/arrow-rs/pull/1041#discussion_r775284993



##########
File path: parquet/src/arrow/record_reader/definition_levels.rs
##########
@@ -0,0 +1,82 @@
+use arrow::array::BooleanBufferBuilder;
+use arrow::bitmap::Bitmap;
+use arrow::buffer::Buffer;
+use std::ops::Range;
+
+use crate::column::reader::decoder::ColumnLevelDecoderImpl;
+use crate::schema::types::ColumnDescPtr;
+
+use super::{
+    buffer::{RecordBuffer, TypedBuffer},
+    MIN_BATCH_SIZE,
+};
+
+pub struct DefinitionLevelBuffer {
+    buffer: TypedBuffer<i16>,
+    builder: BooleanBufferBuilder,
+    max_level: i16,
+}
+
+impl RecordBuffer for DefinitionLevelBuffer {
+    type Output = Buffer;
+    type Writer = [i16];
+
+    fn split(&mut self, len: usize) -> Self::Output {
+        self.buffer.split(len)
+    }
+
+    fn writer(&mut self, batch_size: usize) -> &mut Self::Writer {
+        assert_eq!(self.buffer.len(), self.builder.len());
+        self.buffer.writer(batch_size)
+    }
+
+    fn commit(&mut self, len: usize) {
+        self.buffer.commit(len);
+        let buf = self.buffer.as_slice();
+
+        let range = self.builder.len()..len;
+        self.builder.reserve(range.end - range.start);
+        for i in &buf[range] {
+            self.builder.append(*i == self.max_level)
+        }
+    }
+}
+
+impl DefinitionLevelBuffer {
+    pub fn new(desc: &ColumnDescPtr) -> Self {
+        Self {
+            buffer: TypedBuffer::new(),
+            builder: BooleanBufferBuilder::new(0),
+            max_level: desc.max_def_level(),
+        }
+    }
+
+    /// Split `len` levels out of `self`
+    pub fn split_bitmask(&mut self, len: usize) -> Bitmap {
+        let old_len = self.builder.len();
+        let num_left_values = old_len - len;
+        let new_bitmap_builder =
+            BooleanBufferBuilder::new(MIN_BATCH_SIZE.max(num_left_values));
+
+        let old_bitmap =
+            std::mem::replace(&mut self.builder, new_bitmap_builder).finish();
+        let old_bitmap = Bitmap::from(old_bitmap);
+
+        for i in len..old_len {
+            self.builder.append(old_bitmap.is_set(i));
+        }
+
+        old_bitmap
+    }
+
+    pub fn valid_position_iter(
+        &self,
+        range: Range<usize>,
+    ) -> impl Iterator<Item = usize> + '_ {
+        let max_def_level = self.max_level;
+        let slice = self.buffer.as_slice();
+        range.rev().filter(move |x| slice[*x] == max_def_level)

Review comment:
       it might be more efficient to calculate a boolean array for the null 
bitmap using `arrow::compute::eq_scalar` as used in `ArrowArrayReader` here 
https://github.com/apache/arrow-rs/blob/master/parquet/src/arrow/arrow_array_reader.rs#L570
 , because it can use SIMD (if enabled)




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to