Jefffrey commented on code in PR #19496:
URL: https://github.com/apache/datafusion/pull/19496#discussion_r2658779933
##########
datafusion/functions-window/src/nth_value.rs:
##########
@@ -370,6 +371,33 @@ impl PartitionEvaluator for NthValueEvaluator {
fn memoize(&mut self, state: &mut WindowAggState) -> Result<()> {
let out = &state.out_col;
let size = out.len();
+ if self.ignore_nulls {
+ match self.state.kind {
+ // Prune on first non-null output in case of FIRST_VALUE
+ NthValueKind::First => {
+ if let Some(nulls) = out.nulls() {
+ if self.state.finalized_result.is_none() {
+ if let Some(valid_index) =
nulls.valid_indices().next() {
+ let result =
+ ScalarValue::try_from_array(out,
valid_index)?;
+ self.state.finalized_result = Some(result);
+ } else {
+ // The output is empty or all nulls, ignore
+ }
+ }
+ if state.window_frame_range.start <
state.window_frame_range.end {
+ state.window_frame_range.start =
+ state.window_frame_range.end - 1;
+ }
+ return Ok(());
+ } else {
+ // Fall through to the main case because there are no
nulls
+ }
+ }
+ // Do not memoize for other kinds when nulls are ignored
Review Comment:
The logic here is really hard to follow with all the nesting present
##########
datafusion/functions-window/src/nth_value.rs:
##########
@@ -519,6 +467,87 @@ impl PartitionEvaluator for NthValueEvaluator {
}
}
+impl NthValueEvaluator {
+ fn valid_index(&self, array: &ArrayRef, range: &Range<usize>) ->
Option<usize> {
+ let n_range = range.end - range.start;
+ if self.ignore_nulls {
+ // Calculate valid indices, inside the window frame boundaries.
+ let slice = array.slice(range.start, n_range);
+ if let Some(nulls) = slice.nulls() {
+ return match self.state.kind {
+ NthValueKind::First => {
+ nulls.valid_indices().next().map(|idx| idx +
range.start)
+ }
+ NthValueKind::Last => {
+ nulls.valid_indices().last().map(|idx| idx +
range.start)
+ }
+ NthValueKind::Nth => {
+ match self.n.cmp(&0) {
+ Ordering::Greater => {
+ // SQL indices are not 0-based.
+ let index = (self.n as usize) - 1;
+ nulls
+ .valid_indices()
+ .nth(index)
+ .map(|idx| idx + range.start)
+ }
Review Comment:
I notice we now omit the early check of being out of range?
##########
datafusion/functions-window/src/nth_value.rs:
##########
@@ -519,6 +467,87 @@ impl PartitionEvaluator for NthValueEvaluator {
}
}
+impl NthValueEvaluator {
+ fn valid_index(&self, array: &ArrayRef, range: &Range<usize>) ->
Option<usize> {
+ let n_range = range.end - range.start;
+ if self.ignore_nulls {
Review Comment:
This whole null logic block is quite indented; would be good to see if we
can refactor it
##########
datafusion/functions-window/src/nth_value.rs:
##########
@@ -519,6 +467,87 @@ impl PartitionEvaluator for NthValueEvaluator {
}
}
+impl NthValueEvaluator {
+ fn valid_index(&self, array: &ArrayRef, range: &Range<usize>) ->
Option<usize> {
+ let n_range = range.end - range.start;
+ if self.ignore_nulls {
+ // Calculate valid indices, inside the window frame boundaries.
+ let slice = array.slice(range.start, n_range);
+ if let Some(nulls) = slice.nulls() {
+ return match self.state.kind {
+ NthValueKind::First => {
+ nulls.valid_indices().next().map(|idx| idx +
range.start)
+ }
+ NthValueKind::Last => {
+ nulls.valid_indices().last().map(|idx| idx +
range.start)
+ }
+ NthValueKind::Nth => {
+ match self.n.cmp(&0) {
+ Ordering::Greater => {
+ // SQL indices are not 0-based.
+ let index = (self.n as usize) - 1;
+ nulls
+ .valid_indices()
+ .nth(index)
+ .map(|idx| idx + range.start)
+ }
+ Ordering::Less => {
+ let reverse_index = (-self.n) as usize;
+ if n_range < reverse_index {
+ // Outside the range, return NULL to avoid
allocating
+ // for the sliding window that will be
discarded in the end.
+ return None;
+ }
+ let mut window =
VecDeque::with_capacity(reverse_index);
Review Comment:
Can we reverse the iterator of `valid_indices()` to avoid need of this queue?
##########
datafusion/functions-window/src/nth_value.rs:
##########
@@ -519,6 +467,87 @@ impl PartitionEvaluator for NthValueEvaluator {
}
}
+impl NthValueEvaluator {
+ fn valid_index(&self, array: &ArrayRef, range: &Range<usize>) ->
Option<usize> {
+ let n_range = range.end - range.start;
+ if self.ignore_nulls {
+ // Calculate valid indices, inside the window frame boundaries.
+ let slice = array.slice(range.start, n_range);
+ if let Some(nulls) = slice.nulls() {
Review Comment:
We can also be more strict by checking if the null_count instead of simply
checking for the presence of the null buffer
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]