This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 52ad7d703a Perf: Make sort string view fast(1.5X ~ 3X faster) (#7792)
52ad7d703a is described below
commit 52ad7d703acff0e5b4c143c179df206a842bf24e
Author: Qi Zhu <[email protected]>
AuthorDate: Wed Jul 2 03:53:37 2025 +0800
Perf: Make sort string view fast(1.5X ~ 3X faster) (#7792)
# Which issue does this PR close?
This is a follow-up for https://github.com/apache/arrow-rs/pull/7748
In theory we can custom string view compare, and make it crazy faster.
- Closes [#7790](https://github.com/apache/arrow-rs/issues/7790)
# Rationale for this change
In theory we can custom string view compare, and make it crazy faster.
# What changes are included in this PR?
In theory we can custom string view compare, and make it crazy faster.
# Are these changes tested?
Yes
# Are there any user-facing changes?
No
---
arrow-ord/src/sort.rs | 71 +++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 66 insertions(+), 5 deletions(-)
diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs
index 00606cc6e6..ef63a7e7cb 100644
--- a/arrow-ord/src/sort.rs
+++ b/arrow-ord/src/sort.rs
@@ -24,7 +24,7 @@ use arrow_array::types::*;
use arrow_array::*;
use arrow_buffer::ArrowNativeType;
use arrow_buffer::BooleanBufferBuilder;
-use arrow_data::ArrayDataBuilder;
+use arrow_data::{ArrayDataBuilder, ByteView, MAX_INLINE_VIEW_LEN};
use arrow_schema::{ArrowError, DataType};
use arrow_select::take::take;
use std::cmp::Ordering;
@@ -310,11 +310,72 @@ fn sort_byte_view<T: ByteViewType>(
options: SortOptions,
limit: Option<usize>,
) -> UInt32Array {
- let mut valids = value_indices
+ // 1. Build a list of (index, raw_view, length)
+ let mut valids: Vec<_> = value_indices
.into_iter()
- .map(|index| (index, values.value(index as usize).as_ref()))
- .collect::<Vec<(u32, &[u8])>>();
- sort_impl(options, &mut valids, &nulls, limit, Ord::cmp).into()
+ .map(|idx| {
+ // SAFETY: we know idx < values.len()
+ let raw = unsafe { *values.views().get_unchecked(idx as usize) };
+ let len = raw as u32; // lower 32 bits encode length
+ (idx, raw, len)
+ })
+ .collect();
+
+ // 2. Compute the number of non-null entries to partially sort
+ let vlimit = match (limit, options.nulls_first) {
+ (Some(l), true) => l.saturating_sub(nulls.len()).min(valids.len()),
+ _ => valids.len(),
+ };
+
+ // 3. Mixed comparator: first prefix, then inline vs full comparison
+ let cmp_mixed = |a: &(u32, u128, u32), b: &(u32, u128, u32)| {
+ let (_, raw_a, len_a) = *a;
+ let (_, raw_b, len_b) = *b;
+
+ // 3.1 Both inline (≤12 bytes): compare full 128-bit key including
length
+ if len_a <= MAX_INLINE_VIEW_LEN && len_b <= MAX_INLINE_VIEW_LEN {
+ return GenericByteViewArray::<T>::inline_key_fast(raw_a)
+ .cmp(&GenericByteViewArray::<T>::inline_key_fast(raw_b));
+ }
+
+ // 3.2 Compare 4-byte prefix in big-endian order
+ let pref_a = ByteView::from(raw_a).prefix.swap_bytes();
+ let pref_b = ByteView::from(raw_b).prefix.swap_bytes();
+ if pref_a != pref_b {
+ return pref_a.cmp(&pref_b);
+ }
+
+ // 3.3 Fallback to full byte-slice comparison
+ let full_a: &[u8] = unsafe { values.value_unchecked(a.0 as
usize).as_ref() };
+ let full_b: &[u8] = unsafe { values.value_unchecked(b.0 as
usize).as_ref() };
+ full_a.cmp(full_b)
+ };
+
+ // 4. Partially sort according to ascending/descending
+ if !options.descending {
+ sort_unstable_by(&mut valids, vlimit, cmp_mixed);
+ } else {
+ sort_unstable_by(&mut valids, vlimit, |x, y| cmp_mixed(x,
y).reverse());
+ }
+
+ // 5. Assemble nulls and sorted indices into final output
+ let total = valids.len() + nulls.len();
+ let out_limit = limit.unwrap_or(total).min(total);
+ let mut out = Vec::with_capacity(total);
+
+ if options.nulls_first {
+ // Place null indices first
+ out.extend_from_slice(&nulls[..nulls.len().min(out_limit)]);
+ let rem = out_limit - out.len();
+ out.extend(valids.iter().map(|&(i, _, _)| i).take(rem));
+ } else {
+ // Place non-null indices first
+ out.extend(valids.iter().map(|&(i, _, _)| i).take(out_limit));
+ let rem = out_limit - out.len();
+ out.extend_from_slice(&nulls[..rem]);
+ }
+
+ out.into()
}
fn sort_fixed_size_binary(