This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 6d4e2f2cea Implement min max support for string/binary view types
(#6053)
6d4e2f2cea is described below
commit 6d4e2f2ceaf423031b0bc72f54c547dd77a0ddbb
Author: Xiangpeng Hao <[email protected]>
AuthorDate: Mon Jul 15 12:37:39 2024 -0400
Implement min max support for string/binary view types (#6053)
* add
* implement min max support for string/binary view
* update tests
---
arrow-arith/src/aggregate.rs | 188 ++++++++++++++++++++++++++++++++-----------
1 file changed, 142 insertions(+), 46 deletions(-)
diff --git a/arrow-arith/src/aggregate.rs b/arrow-arith/src/aggregate.rs
index 190685ff9d..0e4d31eee7 100644
--- a/arrow-arith/src/aggregate.rs
+++ b/arrow-arith/src/aggregate.rs
@@ -415,21 +415,41 @@ pub fn max_binary<T: OffsetSizeTrait>(array:
&GenericBinaryArray<T>) -> Option<&
min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b)
}
+/// Returns the maximum value in the binary view array, according to the
natural order.
+pub fn max_binary_view(array: &BinaryViewArray) -> Option<&[u8]> {
+ min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b)
+}
+
/// Returns the minimum value in the binary array, according to the natural
order.
pub fn min_binary<T: OffsetSizeTrait>(array: &GenericBinaryArray<T>) ->
Option<&[u8]> {
min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b)
}
+/// Returns the minimum value in the binary view array, according to the
natural order.
+pub fn min_binary_view(array: &BinaryViewArray) -> Option<&[u8]> {
+ min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b)
+}
+
/// Returns the maximum value in the string array, according to the natural
order.
pub fn max_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) ->
Option<&str> {
min_max_helper::<&str, _, _>(array, |a, b| *a < *b)
}
+/// Returns the maximum value in the string view array, according to the
natural order.
+pub fn max_string_view(array: &StringViewArray) -> Option<&str> {
+ min_max_helper::<&str, _, _>(array, |a, b| *a < *b)
+}
+
/// Returns the minimum value in the string array, according to the natural
order.
pub fn min_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) ->
Option<&str> {
min_max_helper::<&str, _, _>(array, |a, b| *a > *b)
}
+/// Returns the minimum value in the string view array, according to the
natural order.
+pub fn min_string_view(array: &StringViewArray) -> Option<&str> {
+ min_max_helper::<&str, _, _>(array, |a, b| *a > *b)
+}
+
/// Returns the sum of values in the array.
///
/// This doesn't detect overflow. Once overflowing, the result will wrap
around.
@@ -1132,61 +1152,137 @@ mod tests {
assert!(max(&a).unwrap().is_nan());
}
- #[test]
- fn test_binary_min_max_with_nulls() {
- let a = BinaryArray::from(vec![
- Some("b".as_bytes()),
+ macro_rules! test_binary {
+ ($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) =>
{
+ #[test]
+ fn $NAME() {
+ let binary = BinaryArray::from($ARRAY);
+ assert_eq!($EXPECTED_MIN, min_binary(&binary));
+ assert_eq!($EXPECTED_MAX, max_binary(&binary));
+
+ let large_binary = LargeBinaryArray::from($ARRAY);
+ assert_eq!($EXPECTED_MIN, min_binary(&large_binary));
+ assert_eq!($EXPECTED_MAX, max_binary(&large_binary));
+
+ let binary_view = BinaryViewArray::from($ARRAY);
+ assert_eq!($EXPECTED_MIN, min_binary_view(&binary_view));
+ assert_eq!($EXPECTED_MAX, max_binary_view(&binary_view));
+ }
+ };
+ }
+
+ test_binary!(
+ test_binary_min_max_with_nulls,
+ vec![
+ Some("b01234567890123".as_bytes()), // long bytes
None,
None,
Some(b"a"),
Some(b"c"),
- ]);
- assert_eq!(Some("a".as_bytes()), min_binary(&a));
- assert_eq!(Some("c".as_bytes()), max_binary(&a));
- }
-
- #[test]
- fn test_binary_min_max_no_null() {
- let a = BinaryArray::from(vec![Some("b".as_bytes()), Some(b"a"),
Some(b"c")]);
- assert_eq!(Some("a".as_bytes()), min_binary(&a));
- assert_eq!(Some("c".as_bytes()), max_binary(&a));
- }
+ Some(b"abcdedfg0123456"),
+ ],
+ Some("a".as_bytes()),
+ Some("c".as_bytes())
+ );
+
+ test_binary!(
+ test_binary_min_max_no_null,
+ vec![
+ Some("b".as_bytes()),
+ Some(b"abcdefghijklmnopqrst"), // long bytes
+ Some(b"c"),
+ Some(b"b01234567890123"), // long bytes for view types
+ ],
+ Some("abcdefghijklmnopqrst".as_bytes()),
+ Some("c".as_bytes())
+ );
- #[test]
- fn test_binary_min_max_all_nulls() {
- let a = BinaryArray::from(vec![None, None]);
- assert_eq!(None, min_binary(&a));
- assert_eq!(None, max_binary(&a));
- }
+ test_binary!(test_binary_min_max_all_nulls, vec![None, None], None, None);
- #[test]
- fn test_binary_min_max_1() {
- let a = BinaryArray::from(vec![None, None, Some("b".as_bytes()),
Some(b"a")]);
- assert_eq!(Some("a".as_bytes()), min_binary(&a));
- assert_eq!(Some("b".as_bytes()), max_binary(&a));
- }
-
- #[test]
- fn test_string_min_max_with_nulls() {
- let a = StringArray::from(vec![Some("b"), None, None, Some("a"),
Some("c")]);
- assert_eq!(Some("a"), min_string(&a));
- assert_eq!(Some("c"), max_string(&a));
+ test_binary!(
+ test_binary_min_max_1,
+ vec![
+ None,
+ Some("b01234567890123435".as_bytes()), // long bytes for view types
+ None,
+ Some(b"b0123xxxxxxxxxxx"),
+ Some(b"a")
+ ],
+ Some("a".as_bytes()),
+ Some("b0123xxxxxxxxxxx".as_bytes())
+ );
+
+ macro_rules! test_string {
+ ($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) =>
{
+ #[test]
+ fn $NAME() {
+ let string = StringArray::from($ARRAY);
+ assert_eq!($EXPECTED_MIN, min_string(&string));
+ assert_eq!($EXPECTED_MAX, max_string(&string));
+
+ let large_string = LargeStringArray::from($ARRAY);
+ assert_eq!($EXPECTED_MIN, min_string(&large_string));
+ assert_eq!($EXPECTED_MAX, max_string(&large_string));
+
+ let string_view = StringViewArray::from($ARRAY);
+ assert_eq!($EXPECTED_MIN, min_string_view(&string_view));
+ assert_eq!($EXPECTED_MAX, max_string_view(&string_view));
+ }
+ };
}
- #[test]
- fn test_string_min_max_all_nulls() {
- let v: Vec<Option<&str>> = vec![None, None];
- let a = StringArray::from(v);
- assert_eq!(None, min_string(&a));
- assert_eq!(None, max_string(&a));
- }
+ test_string!(
+ test_string_min_max_with_nulls,
+ vec![
+ Some("b012345678901234"), // long bytes for view types
+ None,
+ None,
+ Some("a"),
+ Some("c"),
+ Some("b0123xxxxxxxxxxx")
+ ],
+ Some("a"),
+ Some("c")
+ );
+
+ test_string!(
+ test_string_min_max_no_null,
+ vec![
+ Some("b"),
+ Some("b012345678901234"), // long bytes for view types
+ Some("a"),
+ Some("b012xxxxxxxxxxxx")
+ ],
+ Some("a"),
+ Some("b012xxxxxxxxxxxx")
+ );
+
+ test_string!(
+ test_string_min_max_all_nulls,
+ Vec::<Option<&str>>::from_iter([None, None]),
+ None,
+ None
+ );
- #[test]
- fn test_string_min_max_1() {
- let a = StringArray::from(vec![None, None, Some("b"), Some("a")]);
- assert_eq!(Some("a"), min_string(&a));
- assert_eq!(Some("b"), max_string(&a));
- }
+ test_string!(
+ test_string_min_max_1,
+ vec![
+ None,
+ Some("c12345678901234"), // long bytes for view types
+ None,
+ Some("b"),
+ Some("c1234xxxxxxxxxx")
+ ],
+ Some("b"),
+ Some("c1234xxxxxxxxxx")
+ );
+
+ test_string!(
+ test_string_min_max_empty,
+ Vec::<Option<&str>>::new(),
+ None,
+ None
+ );
#[test]
fn test_boolean_min_max_empty() {