This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 6d4e2f2cea Implement min max support for string/binary view types 
(#6053)
6d4e2f2cea is described below

commit 6d4e2f2ceaf423031b0bc72f54c547dd77a0ddbb
Author: Xiangpeng Hao <[email protected]>
AuthorDate: Mon Jul 15 12:37:39 2024 -0400

    Implement min max support for string/binary view types (#6053)
    
    * add
    
    * implement min max support for string/binary view
    
    * update tests
---
 arrow-arith/src/aggregate.rs | 188 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 142 insertions(+), 46 deletions(-)

diff --git a/arrow-arith/src/aggregate.rs b/arrow-arith/src/aggregate.rs
index 190685ff9d..0e4d31eee7 100644
--- a/arrow-arith/src/aggregate.rs
+++ b/arrow-arith/src/aggregate.rs
@@ -415,21 +415,41 @@ pub fn max_binary<T: OffsetSizeTrait>(array: 
&GenericBinaryArray<T>) -> Option<&
     min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b)
 }
 
+/// Returns the maximum value in the binary view array, according to the 
natural order.
+pub fn max_binary_view(array: &BinaryViewArray) -> Option<&[u8]> {
+    min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b)
+}
+
 /// Returns the minimum value in the binary array, according to the natural 
order.
 pub fn min_binary<T: OffsetSizeTrait>(array: &GenericBinaryArray<T>) -> 
Option<&[u8]> {
     min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b)
 }
 
+/// Returns the minimum value in the binary view array, according to the 
natural order.
+pub fn min_binary_view(array: &BinaryViewArray) -> Option<&[u8]> {
+    min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b)
+}
+
 /// Returns the maximum value in the string array, according to the natural 
order.
 pub fn max_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) -> 
Option<&str> {
     min_max_helper::<&str, _, _>(array, |a, b| *a < *b)
 }
 
+/// Returns the maximum value in the string view array, according to the 
natural order.
+pub fn max_string_view(array: &StringViewArray) -> Option<&str> {
+    min_max_helper::<&str, _, _>(array, |a, b| *a < *b)
+}
+
 /// Returns the minimum value in the string array, according to the natural 
order.
 pub fn min_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) -> 
Option<&str> {
     min_max_helper::<&str, _, _>(array, |a, b| *a > *b)
 }
 
+/// Returns the minimum value in the string view array, according to the 
natural order.
+pub fn min_string_view(array: &StringViewArray) -> Option<&str> {
+    min_max_helper::<&str, _, _>(array, |a, b| *a > *b)
+}
+
 /// Returns the sum of values in the array.
 ///
 /// This doesn't detect overflow. Once overflowing, the result will wrap 
around.
@@ -1132,61 +1152,137 @@ mod tests {
         assert!(max(&a).unwrap().is_nan());
     }
 
-    #[test]
-    fn test_binary_min_max_with_nulls() {
-        let a = BinaryArray::from(vec![
-            Some("b".as_bytes()),
+    macro_rules! test_binary {
+        ($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) => 
{
+            #[test]
+            fn $NAME() {
+                let binary = BinaryArray::from($ARRAY);
+                assert_eq!($EXPECTED_MIN, min_binary(&binary));
+                assert_eq!($EXPECTED_MAX, max_binary(&binary));
+
+                let large_binary = LargeBinaryArray::from($ARRAY);
+                assert_eq!($EXPECTED_MIN, min_binary(&large_binary));
+                assert_eq!($EXPECTED_MAX, max_binary(&large_binary));
+
+                let binary_view = BinaryViewArray::from($ARRAY);
+                assert_eq!($EXPECTED_MIN, min_binary_view(&binary_view));
+                assert_eq!($EXPECTED_MAX, max_binary_view(&binary_view));
+            }
+        };
+    }
+
+    test_binary!(
+        test_binary_min_max_with_nulls,
+        vec![
+            Some("b01234567890123".as_bytes()), // long bytes
             None,
             None,
             Some(b"a"),
             Some(b"c"),
-        ]);
-        assert_eq!(Some("a".as_bytes()), min_binary(&a));
-        assert_eq!(Some("c".as_bytes()), max_binary(&a));
-    }
-
-    #[test]
-    fn test_binary_min_max_no_null() {
-        let a = BinaryArray::from(vec![Some("b".as_bytes()), Some(b"a"), 
Some(b"c")]);
-        assert_eq!(Some("a".as_bytes()), min_binary(&a));
-        assert_eq!(Some("c".as_bytes()), max_binary(&a));
-    }
+            Some(b"abcdedfg0123456"),
+        ],
+        Some("a".as_bytes()),
+        Some("c".as_bytes())
+    );
+
+    test_binary!(
+        test_binary_min_max_no_null,
+        vec![
+            Some("b".as_bytes()),
+            Some(b"abcdefghijklmnopqrst"), // long bytes
+            Some(b"c"),
+            Some(b"b01234567890123"), // long bytes for view types
+        ],
+        Some("abcdefghijklmnopqrst".as_bytes()),
+        Some("c".as_bytes())
+    );
 
-    #[test]
-    fn test_binary_min_max_all_nulls() {
-        let a = BinaryArray::from(vec![None, None]);
-        assert_eq!(None, min_binary(&a));
-        assert_eq!(None, max_binary(&a));
-    }
+    test_binary!(test_binary_min_max_all_nulls, vec![None, None], None, None);
 
-    #[test]
-    fn test_binary_min_max_1() {
-        let a = BinaryArray::from(vec![None, None, Some("b".as_bytes()), 
Some(b"a")]);
-        assert_eq!(Some("a".as_bytes()), min_binary(&a));
-        assert_eq!(Some("b".as_bytes()), max_binary(&a));
-    }
-
-    #[test]
-    fn test_string_min_max_with_nulls() {
-        let a = StringArray::from(vec![Some("b"), None, None, Some("a"), 
Some("c")]);
-        assert_eq!(Some("a"), min_string(&a));
-        assert_eq!(Some("c"), max_string(&a));
+    test_binary!(
+        test_binary_min_max_1,
+        vec![
+            None,
+            Some("b01234567890123435".as_bytes()), // long bytes for view types
+            None,
+            Some(b"b0123xxxxxxxxxxx"),
+            Some(b"a")
+        ],
+        Some("a".as_bytes()),
+        Some("b0123xxxxxxxxxxx".as_bytes())
+    );
+
+    macro_rules! test_string {
+        ($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) => 
{
+            #[test]
+            fn $NAME() {
+                let string = StringArray::from($ARRAY);
+                assert_eq!($EXPECTED_MIN, min_string(&string));
+                assert_eq!($EXPECTED_MAX, max_string(&string));
+
+                let large_string = LargeStringArray::from($ARRAY);
+                assert_eq!($EXPECTED_MIN, min_string(&large_string));
+                assert_eq!($EXPECTED_MAX, max_string(&large_string));
+
+                let string_view = StringViewArray::from($ARRAY);
+                assert_eq!($EXPECTED_MIN, min_string_view(&string_view));
+                assert_eq!($EXPECTED_MAX, max_string_view(&string_view));
+            }
+        };
     }
 
-    #[test]
-    fn test_string_min_max_all_nulls() {
-        let v: Vec<Option<&str>> = vec![None, None];
-        let a = StringArray::from(v);
-        assert_eq!(None, min_string(&a));
-        assert_eq!(None, max_string(&a));
-    }
+    test_string!(
+        test_string_min_max_with_nulls,
+        vec![
+            Some("b012345678901234"), // long bytes for view types
+            None,
+            None,
+            Some("a"),
+            Some("c"),
+            Some("b0123xxxxxxxxxxx")
+        ],
+        Some("a"),
+        Some("c")
+    );
+
+    test_string!(
+        test_string_min_max_no_null,
+        vec![
+            Some("b"),
+            Some("b012345678901234"), // long bytes for view types
+            Some("a"),
+            Some("b012xxxxxxxxxxxx")
+        ],
+        Some("a"),
+        Some("b012xxxxxxxxxxxx")
+    );
+
+    test_string!(
+        test_string_min_max_all_nulls,
+        Vec::<Option<&str>>::from_iter([None, None]),
+        None,
+        None
+    );
 
-    #[test]
-    fn test_string_min_max_1() {
-        let a = StringArray::from(vec![None, None, Some("b"), Some("a")]);
-        assert_eq!(Some("a"), min_string(&a));
-        assert_eq!(Some("b"), max_string(&a));
-    }
+    test_string!(
+        test_string_min_max_1,
+        vec![
+            None,
+            Some("c12345678901234"), // long bytes for view types
+            None,
+            Some("b"),
+            Some("c1234xxxxxxxxxx")
+        ],
+        Some("b"),
+        Some("c1234xxxxxxxxxx")
+    );
+
+    test_string!(
+        test_string_min_max_empty,
+        Vec::<Option<&str>>::new(),
+        None,
+        None
+    );
 
     #[test]
     fn test_boolean_min_max_empty() {

Reply via email to