mzabaluev commented on code in PR #19496:
URL: https://github.com/apache/datafusion/pull/19496#discussion_r2661969517
##########
datafusion/functions-window/src/nth_value.rs:
##########
@@ -519,6 +467,87 @@ impl PartitionEvaluator for NthValueEvaluator {
}
}
+impl NthValueEvaluator {
+ fn valid_index(&self, array: &ArrayRef, range: &Range<usize>) ->
Option<usize> {
+ let n_range = range.end - range.start;
+ if self.ignore_nulls {
+ // Calculate valid indices, inside the window frame boundaries.
+ let slice = array.slice(range.start, n_range);
+ if let Some(nulls) = slice.nulls() {
+ return match self.state.kind {
+ NthValueKind::First => {
+ nulls.valid_indices().next().map(|idx| idx +
range.start)
+ }
+ NthValueKind::Last => {
+ nulls.valid_indices().last().map(|idx| idx +
range.start)
+ }
+ NthValueKind::Nth => {
+ match self.n.cmp(&0) {
+ Ordering::Greater => {
+ // SQL indices are not 0-based.
+ let index = (self.n as usize) - 1;
+ nulls
+ .valid_indices()
+ .nth(index)
+ .map(|idx| idx + range.start)
+ }
+ Ordering::Less => {
+ let reverse_index = (-self.n) as usize;
+ if n_range < reverse_index {
+ // Outside the range, return NULL to avoid
allocating
+ // for the sliding window that will be
discarded in the end.
+ return None;
+ }
+ let mut window =
VecDeque::with_capacity(reverse_index);
Review Comment:
Thanks for the suggestion! With it, the implementation is faster across the
board accordingly to the added benchmark.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]