mzabaluev commented on code in PR #19496:
URL: https://github.com/apache/datafusion/pull/19496#discussion_r2659184727
##########
datafusion/functions-window/src/nth_value.rs:
##########
@@ -519,6 +467,87 @@ impl PartitionEvaluator for NthValueEvaluator {
}
}
+impl NthValueEvaluator {
+ fn valid_index(&self, array: &ArrayRef, range: &Range<usize>) ->
Option<usize> {
+ let n_range = range.end - range.start;
+ if self.ignore_nulls {
+ // Calculate valid indices, inside the window frame boundaries.
+ let slice = array.slice(range.start, n_range);
+ if let Some(nulls) = slice.nulls() {
Review Comment:
That will be another branch, and the iterator returned by
`nulls.valid_indices()` being empty will do the right thing anyway?
##########
datafusion/functions-window/src/nth_value.rs:
##########
@@ -519,6 +467,87 @@ impl PartitionEvaluator for NthValueEvaluator {
}
}
+impl NthValueEvaluator {
+ fn valid_index(&self, array: &ArrayRef, range: &Range<usize>) ->
Option<usize> {
+ let n_range = range.end - range.start;
+ if self.ignore_nulls {
+ // Calculate valid indices, inside the window frame boundaries.
+ let slice = array.slice(range.start, n_range);
+ if let Some(nulls) = slice.nulls() {
+ return match self.state.kind {
+ NthValueKind::First => {
+ nulls.valid_indices().next().map(|idx| idx +
range.start)
+ }
+ NthValueKind::Last => {
+ nulls.valid_indices().last().map(|idx| idx +
range.start)
+ }
+ NthValueKind::Nth => {
+ match self.n.cmp(&0) {
+ Ordering::Greater => {
+ // SQL indices are not 0-based.
+ let index = (self.n as usize) - 1;
+ nulls
+ .valid_indices()
+ .nth(index)
+ .map(|idx| idx + range.start)
+ }
Review Comment:
The nulls buffer is in a slice limited by the range, so any valid index
should be within the range (adjusted with the offset).
##########
datafusion/functions-window/src/nth_value.rs:
##########
@@ -519,6 +467,87 @@ impl PartitionEvaluator for NthValueEvaluator {
}
}
+impl NthValueEvaluator {
+ fn valid_index(&self, array: &ArrayRef, range: &Range<usize>) ->
Option<usize> {
+ let n_range = range.end - range.start;
+ if self.ignore_nulls {
+ // Calculate valid indices, inside the window frame boundaries.
+ let slice = array.slice(range.start, n_range);
+ if let Some(nulls) = slice.nulls() {
+ return match self.state.kind {
+ NthValueKind::First => {
+ nulls.valid_indices().next().map(|idx| idx +
range.start)
+ }
+ NthValueKind::Last => {
+ nulls.valid_indices().last().map(|idx| idx +
range.start)
+ }
+ NthValueKind::Nth => {
+ match self.n.cmp(&0) {
+ Ordering::Greater => {
+ // SQL indices are not 0-based.
+ let index = (self.n as usize) - 1;
+ nulls
+ .valid_indices()
+ .nth(index)
+ .map(|idx| idx + range.start)
+ }
+ Ordering::Less => {
+ let reverse_index = (-self.n) as usize;
+ if n_range < reverse_index {
+ // Outside the range, return NULL to avoid
allocating
+ // for the sliding window that will be
discarded in the end.
+ return None;
+ }
+ let mut window =
VecDeque::with_capacity(reverse_index);
Review Comment:
Unfortunately, `BitIndexIterator` is not bidirectional.
##########
datafusion/functions-window/src/nth_value.rs:
##########
@@ -370,6 +371,33 @@ impl PartitionEvaluator for NthValueEvaluator {
fn memoize(&mut self, state: &mut WindowAggState) -> Result<()> {
let out = &state.out_col;
let size = out.len();
+ if self.ignore_nulls {
+ match self.state.kind {
+ // Prune on first non-null output in case of FIRST_VALUE
+ NthValueKind::First => {
+ if let Some(nulls) = out.nulls() {
+ if self.state.finalized_result.is_none() {
+ if let Some(valid_index) =
nulls.valid_indices().next() {
+ let result =
+ ScalarValue::try_from_array(out,
valid_index)?;
+ self.state.finalized_result = Some(result);
+ } else {
+ // The output is empty or all nulls, ignore
+ }
+ }
+ if state.window_frame_range.start <
state.window_frame_range.end {
+ state.window_frame_range.start =
+ state.window_frame_range.end - 1;
+ }
+ return Ok(());
+ } else {
+ // Fall through to the main case because there are no
nulls
+ }
+ }
+ // Do not memoize for other kinds when nulls are ignored
Review Comment:
Between early returns and falling through to the main case, I don't know if
I can make it more readable.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]