Dandandan commented on code in PR #9300:
URL: https://github.com/apache/arrow-rs/pull/9300#discussion_r2744971490


##########
arrow-ord/src/sort.rs:
##########
@@ -338,90 +340,96 @@ fn sort_primitive<T: ArrowPrimitiveType>(
     sort_impl(options, &mut valids, &nulls, limit, T::Native::compare).into()
 }
 
+struct FixedSizeBinaryAccessor<'a> {
+    values: &'a FixedSizeBinaryArray,
+    indices: &'a [u32],
+}
+
+impl<'a> KeyAccessor for FixedSizeBinaryAccessor<'a> {
+    #[inline(always)]
+    fn get_key(&self, index: usize) -> &[u8] {
+        unsafe {
+            let idx = *self.indices.get_unchecked(index);
+            self.values.value_unchecked(idx as usize)
+        }
+    }
+
+    #[inline(always)]
+    fn len(&self) -> usize {
+        self.indices.len()
+    }
+}
+
 fn sort_bytes<T: ByteArrayType>(
     values: &GenericByteArray<T>,
     value_indices: Vec<u32>,
     nulls: Vec<u32>,
     options: SortOptions,
     limit: Option<usize>,
 ) -> UInt32Array {
-    // Note: Why do we use 4‑byte prefix?
-    // Compute the 4‑byte prefix in BE order, or left‑pad if shorter.
-    // Most byte‐sequences differ in their first few bytes, so by
-    // comparing up to 4 bytes as a single u32 we avoid the overhead
-    // of a full lexicographical compare for the vast majority of cases.
-
-    // 1. Build a vector of (index, prefix, length) tuples
-    let mut valids: Vec<(u32, u32, u64)> = value_indices
-        .into_iter()
-        .map(|idx| unsafe {
+    // Build (index, 8-byte prefix) tuples for prefix-accelerated comparison 
sort

Review Comment:
   What is the improvement without `orasort`  on this?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to