andygrove commented on a change in pull request #8171:
URL: https://github.com/apache/arrow/pull/8171#discussion_r487425717



##########
File path: rust/arrow/src/compute/kernels/aggregate.rs
##########
@@ -19,9 +19,42 @@
 
 use std::ops::Add;
 
-use crate::array::{Array, PrimitiveArray};
+use crate::array::{Array, LargeStringArray, PrimitiveArray, StringArray};
 use crate::datatypes::ArrowNumericType;
 
+/// Helper macro to perform min/max of strings
+macro_rules! min_max_string_helper {
+    ($array:expr, $cmp:tt) => {{
+        let null_count = $array.null_count();
+
+        if null_count == $array.len() {
+            return None
+        }
+        let mut n = "";
+        let mut has_value = false;
+        let data = $array.data();
+
+        if null_count == 0 {
+            for i in 0..data.len() {
+                let item = $array.value(i);
+                if !has_value || (&n $cmp &item) {
+                    has_value = true;
+                    n = item;
+                }
+            }
+        } else {
+            for i in 0..data.len() {
+                let item = $array.value(i);
+                if !has_value || data.is_valid(i) && (&n $cmp &item) {

Review comment:
       Is this correct? `&&` has higher precedence than `||` so I am reading 
this as `if !has_value || (data.is_valid(i) && (&n $cmp &item))` and since 
`!has_value` is true on the first iteration of the loop, it will always set 
`has_value = true` ?
   
   

##########
File path: rust/arrow/src/compute/kernels/aggregate.rs
##########
@@ -149,4 +202,18 @@ mod tests {
         assert_eq!(5, min(&a).unwrap());
         assert_eq!(9, max(&a).unwrap());
     }
+
+    #[test]
+    fn test_string_min_max_with_nulls() {
+        let a = StringArray::from(vec![Some("b"), None, None, Some("a"), 
Some("c")]);
+        assert_eq!("a", min_string(&a).unwrap());
+        assert_eq!("c", max_string(&a).unwrap());
+    }
+
+    #[test]
+    fn test_string_min_max_all_nulls() {

Review comment:
       Could you add an additional test with an array with one or more `None` 
at the start, followed by one or more `Some` to see if I am correct about my 
earlier comment?

##########
File path: rust/arrow/src/compute/kernels/aggregate.rs
##########
@@ -19,9 +19,42 @@
 
 use std::ops::Add;
 
-use crate::array::{Array, PrimitiveArray};
+use crate::array::{Array, LargeStringArray, PrimitiveArray, StringArray};
 use crate::datatypes::ArrowNumericType;
 
+/// Helper macro to perform min/max of strings
+macro_rules! min_max_string_helper {
+    ($array:expr, $cmp:tt) => {{
+        let null_count = $array.null_count();
+
+        if null_count == $array.len() {
+            return None
+        }
+        let mut n = "";
+        let mut has_value = false;
+        let data = $array.data();
+
+        if null_count == 0 {
+            for i in 0..data.len() {
+                let item = $array.value(i);
+                if !has_value || (&n $cmp &item) {
+                    has_value = true;
+                    n = item;
+                }
+            }
+        } else {
+            for i in 0..data.len() {
+                let item = $array.value(i);
+                if !has_value || data.is_valid(i) && (&n $cmp &item) {

Review comment:
       Is this correct? `&&` has higher precedence than `||` so I am reading 
this as `if !has_value || (data.is_valid(i) && (&n $cmp &item))` and since 
`!has_value` is true on the first iteration of the loop, it will always set 
`has_value = true` ?
   
   

##########
File path: rust/arrow/src/compute/kernels/aggregate.rs
##########
@@ -149,4 +202,18 @@ mod tests {
         assert_eq!(5, min(&a).unwrap());
         assert_eq!(9, max(&a).unwrap());
     }
+
+    #[test]
+    fn test_string_min_max_with_nulls() {
+        let a = StringArray::from(vec![Some("b"), None, None, Some("a"), 
Some("c")]);
+        assert_eq!("a", min_string(&a).unwrap());
+        assert_eq!("c", max_string(&a).unwrap());
+    }
+
+    #[test]
+    fn test_string_min_max_all_nulls() {

Review comment:
       Could you add an additional test with an array with one or more `None` 
at the start, followed by one or more `Some` to see if I am correct about my 
earlier comment?

##########
File path: rust/arrow/src/compute/kernels/aggregate.rs
##########
@@ -19,9 +19,42 @@
 
 use std::ops::Add;
 
-use crate::array::{Array, PrimitiveArray};
+use crate::array::{Array, LargeStringArray, PrimitiveArray, StringArray};
 use crate::datatypes::ArrowNumericType;
 
+/// Helper macro to perform min/max of strings
+macro_rules! min_max_string_helper {
+    ($array:expr, $cmp:tt) => {{
+        let null_count = $array.null_count();
+
+        if null_count == $array.len() {
+            return None
+        }
+        let mut n = "";
+        let mut has_value = false;
+        let data = $array.data();
+
+        if null_count == 0 {
+            for i in 0..data.len() {
+                let item = $array.value(i);
+                if !has_value || (&n $cmp &item) {
+                    has_value = true;
+                    n = item;
+                }
+            }
+        } else {
+            for i in 0..data.len() {
+                let item = $array.value(i);
+                if !has_value || data.is_valid(i) && (&n $cmp &item) {

Review comment:
       Is this correct? `&&` has higher precedence than `||` so I am reading 
this as `if !has_value || (data.is_valid(i) && (&n $cmp &item))` and since 
`!has_value` is true on the first iteration of the loop, it will always set 
`has_value = true` ?
   
   

##########
File path: rust/arrow/src/compute/kernels/aggregate.rs
##########
@@ -149,4 +202,18 @@ mod tests {
         assert_eq!(5, min(&a).unwrap());
         assert_eq!(9, max(&a).unwrap());
     }
+
+    #[test]
+    fn test_string_min_max_with_nulls() {
+        let a = StringArray::from(vec![Some("b"), None, None, Some("a"), 
Some("c")]);
+        assert_eq!("a", min_string(&a).unwrap());
+        assert_eq!("c", max_string(&a).unwrap());
+    }
+
+    #[test]
+    fn test_string_min_max_all_nulls() {

Review comment:
       Could you add an additional test with an array with one or more `None` 
at the start, followed by one or more `Some` to see if I am correct about my 
earlier comment?

##########
File path: rust/arrow/src/compute/kernels/aggregate.rs
##########
@@ -19,9 +19,42 @@
 
 use std::ops::Add;
 
-use crate::array::{Array, PrimitiveArray};
+use crate::array::{Array, LargeStringArray, PrimitiveArray, StringArray};
 use crate::datatypes::ArrowNumericType;
 
+/// Helper macro to perform min/max of strings
+macro_rules! min_max_string_helper {
+    ($array:expr, $cmp:tt) => {{
+        let null_count = $array.null_count();
+
+        if null_count == $array.len() {
+            return None
+        }
+        let mut n = "";
+        let mut has_value = false;
+        let data = $array.data();
+
+        if null_count == 0 {
+            for i in 0..data.len() {
+                let item = $array.value(i);
+                if !has_value || (&n $cmp &item) {
+                    has_value = true;
+                    n = item;
+                }
+            }
+        } else {
+            for i in 0..data.len() {
+                let item = $array.value(i);
+                if !has_value || data.is_valid(i) && (&n $cmp &item) {

Review comment:
       Is this correct? `&&` has higher precedence than `||` so I am reading 
this as `if !has_value || (data.is_valid(i) && (&n $cmp &item))` and since 
`!has_value` is true on the first iteration of the loop, it will always set 
`has_value = true` ?
   
   

##########
File path: rust/arrow/src/compute/kernels/aggregate.rs
##########
@@ -149,4 +202,18 @@ mod tests {
         assert_eq!(5, min(&a).unwrap());
         assert_eq!(9, max(&a).unwrap());
     }
+
+    #[test]
+    fn test_string_min_max_with_nulls() {
+        let a = StringArray::from(vec![Some("b"), None, None, Some("a"), 
Some("c")]);
+        assert_eq!("a", min_string(&a).unwrap());
+        assert_eq!("c", max_string(&a).unwrap());
+    }
+
+    #[test]
+    fn test_string_min_max_all_nulls() {

Review comment:
       Could you add an additional test with an array with one or more `None` 
at the start, followed by one or more `Some` to see if I am correct about my 
earlier comment?

##########
File path: rust/arrow/src/compute/kernels/aggregate.rs
##########
@@ -19,9 +19,42 @@
 
 use std::ops::Add;
 
-use crate::array::{Array, PrimitiveArray};
+use crate::array::{Array, LargeStringArray, PrimitiveArray, StringArray};
 use crate::datatypes::ArrowNumericType;
 
+/// Helper macro to perform min/max of strings
+macro_rules! min_max_string_helper {
+    ($array:expr, $cmp:tt) => {{
+        let null_count = $array.null_count();
+
+        if null_count == $array.len() {
+            return None
+        }
+        let mut n = "";
+        let mut has_value = false;
+        let data = $array.data();
+
+        if null_count == 0 {
+            for i in 0..data.len() {
+                let item = $array.value(i);
+                if !has_value || (&n $cmp &item) {
+                    has_value = true;
+                    n = item;
+                }
+            }
+        } else {
+            for i in 0..data.len() {
+                let item = $array.value(i);
+                if !has_value || data.is_valid(i) && (&n $cmp &item) {

Review comment:
       Is this correct? `&&` has higher precedence than `||` so I am reading 
this as `if !has_value || (data.is_valid(i) && (&n $cmp &item))` and since 
`!has_value` is true on the first iteration of the loop, it will always set 
`has_value = true` ?
   
   

##########
File path: rust/arrow/src/compute/kernels/aggregate.rs
##########
@@ -149,4 +202,18 @@ mod tests {
         assert_eq!(5, min(&a).unwrap());
         assert_eq!(9, max(&a).unwrap());
     }
+
+    #[test]
+    fn test_string_min_max_with_nulls() {
+        let a = StringArray::from(vec![Some("b"), None, None, Some("a"), 
Some("c")]);
+        assert_eq!("a", min_string(&a).unwrap());
+        assert_eq!("c", max_string(&a).unwrap());
+    }
+
+    #[test]
+    fn test_string_min_max_all_nulls() {

Review comment:
       Could you add an additional test with an array with one or more `None` 
at the start, followed by one or more `Some` to see if I am correct about my 
earlier comment?

##########
File path: rust/arrow/src/compute/kernels/aggregate.rs
##########
@@ -19,9 +19,42 @@
 
 use std::ops::Add;
 
-use crate::array::{Array, PrimitiveArray};
+use crate::array::{Array, LargeStringArray, PrimitiveArray, StringArray};
 use crate::datatypes::ArrowNumericType;
 
+/// Helper macro to perform min/max of strings
+macro_rules! min_max_string_helper {
+    ($array:expr, $cmp:tt) => {{
+        let null_count = $array.null_count();
+
+        if null_count == $array.len() {
+            return None
+        }
+        let mut n = "";
+        let mut has_value = false;
+        let data = $array.data();
+
+        if null_count == 0 {
+            for i in 0..data.len() {
+                let item = $array.value(i);
+                if !has_value || (&n $cmp &item) {
+                    has_value = true;
+                    n = item;
+                }
+            }
+        } else {
+            for i in 0..data.len() {
+                let item = $array.value(i);
+                if !has_value || data.is_valid(i) && (&n $cmp &item) {

Review comment:
       Is this correct? `&&` has higher precedence than `||` so I am reading 
this as `if !has_value || (data.is_valid(i) && (&n $cmp &item))` and since 
`!has_value` is true on the first iteration of the loop, it will always set 
`has_value = true` ?
   
   

##########
File path: rust/arrow/src/compute/kernels/aggregate.rs
##########
@@ -149,4 +202,18 @@ mod tests {
         assert_eq!(5, min(&a).unwrap());
         assert_eq!(9, max(&a).unwrap());
     }
+
+    #[test]
+    fn test_string_min_max_with_nulls() {
+        let a = StringArray::from(vec![Some("b"), None, None, Some("a"), 
Some("c")]);
+        assert_eq!("a", min_string(&a).unwrap());
+        assert_eq!("c", max_string(&a).unwrap());
+    }
+
+    #[test]
+    fn test_string_min_max_all_nulls() {

Review comment:
       Could you add an additional test with an array with one or more `None` 
at the start, followed by one or more `Some` to see if I am correct about my 
earlier comment?

##########
File path: rust/arrow/src/compute/kernels/aggregate.rs
##########
@@ -19,9 +19,42 @@
 
 use std::ops::Add;
 
-use crate::array::{Array, PrimitiveArray};
+use crate::array::{Array, LargeStringArray, PrimitiveArray, StringArray};
 use crate::datatypes::ArrowNumericType;
 
+/// Helper macro to perform min/max of strings
+macro_rules! min_max_string_helper {
+    ($array:expr, $cmp:tt) => {{
+        let null_count = $array.null_count();
+
+        if null_count == $array.len() {
+            return None
+        }
+        let mut n = "";
+        let mut has_value = false;
+        let data = $array.data();
+
+        if null_count == 0 {
+            for i in 0..data.len() {
+                let item = $array.value(i);
+                if !has_value || (&n $cmp &item) {
+                    has_value = true;
+                    n = item;
+                }
+            }
+        } else {
+            for i in 0..data.len() {
+                let item = $array.value(i);
+                if !has_value || data.is_valid(i) && (&n $cmp &item) {

Review comment:
       Is this correct? `&&` has higher precedence than `||` so I am reading 
this as `if !has_value || (data.is_valid(i) && (&n $cmp &item))` and since 
`!has_value` is true on the first iteration of the loop, it will always set 
`has_value = true` ?
   
   

##########
File path: rust/arrow/src/compute/kernels/aggregate.rs
##########
@@ -149,4 +202,18 @@ mod tests {
         assert_eq!(5, min(&a).unwrap());
         assert_eq!(9, max(&a).unwrap());
     }
+
+    #[test]
+    fn test_string_min_max_with_nulls() {
+        let a = StringArray::from(vec![Some("b"), None, None, Some("a"), 
Some("c")]);
+        assert_eq!("a", min_string(&a).unwrap());
+        assert_eq!("c", max_string(&a).unwrap());
+    }
+
+    #[test]
+    fn test_string_min_max_all_nulls() {

Review comment:
       Could you add an additional test with an array with one or more `None` 
at the start, followed by one or more `Some` to see if I am correct about my 
earlier comment?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to