mustafasrepo commented on code in PR #6621: URL: https://github.com/apache/arrow-datafusion/pull/6621#discussion_r1226193242
########## datafusion/physical-expr/src/window/nth_value.rs: ########## @@ -169,9 +164,35 @@ impl PartitionEvaluator for NthValueEvaluator { Ok(()) } - fn set_state(&mut self, state: &BuiltinWindowState) -> Result<()> { - if let BuiltinWindowState::NthValue(nth_value_state) = state { - self.state = nth_value_state.clone() + /// When the window frame has a fixed beginning (e.g UNBOUNDED + /// PRECEDING), some functions such as FIRST_VALUE, LAST_VALUE and + /// NTH_VALUE we can memoize result. Once result is calculated it + /// will always stay same. Hence, we do not need to keep past data + /// as we process the entire dataset. This feature enables us to + /// prune rows from table. The default implementation does nothing + fn memoize(&mut self, state: &mut WindowAggState) -> Result<()> { + let out = &state.out_col; + let size = out.len(); + let (is_prunable, new_prunable) = match self.state.kind { + NthValueKind::First => { + let n_range = + state.window_frame_range.end - state.window_frame_range.start; + (n_range > 0 && size > 0, true) + } + NthValueKind::Last => (true, false), + NthValueKind::Nth(n) => { + let n_range = + state.window_frame_range.end - state.window_frame_range.start; + (n_range >= (n as usize) && size >= (n as usize), true) + } + }; + if is_prunable { + if self.state.finalized_result.is_none() && new_prunable { Review Comment: in case we use `is_last` flag, condition should be `if self.state.finalized_result.is_none() && !is_last {` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org