atefsawaed commented on code in PR #1621:
URL: https://github.com/apache/arrow-rs/pull/1621#discussion_r861667292


##########
parquet/src/column/writer.rs:
##########
@@ -1049,6 +1084,54 @@ fn has_dictionary_support(kind: Type, props: 
&WriterProperties) -> bool {
     }
 }
 
+/// Signed comparison of bytes arrays
+fn compare_greater_byte_array_decimals(a: &[u8], b: &[u8]) -> bool {
+    let a_length = a.len();
+    let b_length = b.len();
+
+    if a_length == 0 || b_length == 0 {
+        return a_length > 0 && b_length == 0;
+    }
+
+    let first_a: u8 = a[0];
+    let first_b: u8 = b[0];
+
+    // We can short circuit for different signed numbers or
+    // for equal length bytes arrays that have different first bytes.
+    // The equality requirement is necessary for sign extension cases.
+    // 0xFF10 should be equal to 0x10 (due to big endian sign extension).
+    if (0x80 & first_a) != (0x80 & first_b)
+        || (a_length == b_length && first_a != first_b)
+    {
+        return (first_a as i8) > (first_b as i8);
+    }
+
+    // When the lengths are unequal and the numbers are of the same
+    // sign we need to do comparison by sign extending the shorter
+    // value first, and once we get to equal sized arrays, lexicographical
+    // unsigned comparison of everything but the first byte is sufficient.
+
+    let extension: u8 = if (first_a as i8) < 0 { 0xFF } else { 0 };
+
+    if a_length != b_length {
+        let not_equal = if a_length > b_length {
+            let lead_length = a_length - b_length;
+            Some((&a[0..lead_length]).iter().any(|&x| x != extension))
+        } else {
+            let lead_length = b_length - a_length;
+            Some((&b[0..lead_length]).iter().any(|&x| x != extension))
+        };
+
+        if not_equal.unwrap() {
+            let negative_values: bool = (first_a as i8) < 0;
+            let a_longer: bool = a_length > b_length;
+            return if negative_values { !a_longer } else { a_longer };
+        }
+    }
+
+    (a[1..]) > (b[1..])

Review Comment:
   No, in this case the lexicographical unsigned comparison of both buffers is 
sufficient.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to