alamb commented on code in PR #4989:
URL: https://github.com/apache/arrow-datafusion/pull/4989#discussion_r1083023324
##########
datafusion/common/src/utils.rs:
##########
@@ -103,6 +111,53 @@ where
Ok(low)
}
+/// This function searches for a tuple of given values (`target`) among the
given
+/// rows (`item_columns`) via a linear scan. It assumes that `item_columns` is
sorted
+/// according to `sort_options` and returns the insertion index of `target`.
+/// Template argument `SIDE` being `true`/`false` means left/right insertion.
+pub fn linear_search<const SIDE: bool>(
+ item_columns: &[ArrayRef],
+ target: &[ScalarValue],
+ sort_options: &[SortOptions],
+) -> Result<usize> {
+ let low: usize = 0;
+ let high: usize = item_columns
+ .get(0)
+ .ok_or_else(|| {
+ DataFusionError::Internal("Column array shouldn't be
empty".to_string())
+ })?
+ .len();
+ let compare_fn = |current: &[ScalarValue], target: &[ScalarValue]| {
+ let cmp = compare_rows(current, target, sort_options)?;
+ Ok(if SIDE { cmp.is_lt() } else { cmp.is_le() })
+ };
+ search_in_slice(item_columns, target, compare_fn, low, high)
+}
+
+/// This function searches for a tuple of given values (`target`) among a
slice of
+/// the given rows (`item_columns`) via a linear scan. The slice starts at the
index
+/// `low` and ends at the index `high`. The boolean-valued function
`compare_fn`
+/// specifies the stopping criterion.
+pub fn search_in_slice<F>(
+ item_columns: &[ArrayRef],
+ target: &[ScalarValue],
+ compare_fn: F,
+ mut low: usize,
+ high: usize,
+) -> Result<usize>
+where
+ F: Fn(&[ScalarValue], &[ScalarValue]) -> Result<bool>,
+{
+ while low < high {
Review Comment:
I guess I was wondering if we could get an iter over `item_columns` somehow
(and thus avoid all the bounds checks) -- I realize this is not really easy w/
multiple arrays. 🤔
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]