This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 350ea26c6 Support `Utf8View` for `bit_length` kernel (#6671)
350ea26c6 is described below

commit 350ea26c6cc646baefb72e39e138d10f9261f71e
Author: Austin Liu <[email protected]>
AuthorDate: Wed Nov 6 00:13:19 2024 +0800

    Support `Utf8View` for `bit_length` kernel (#6671)
    
    * Support `Utf8View` for string function `bit_length()`
    
    Signed-off-by: Austin Liu <[email protected]>
    
    * Add test & handle view bytes length counting
    
    Signed-off-by: Austin Liu <[email protected]>
    
    Add test & handle view bytes length counting
    
    Signed-off-by: Austin Liu <[email protected]>
    
    * Refine `string_view_array`
    
    Signed-off-by: Austin Liu <[email protected]>
    
    * Make length from `i32` to `u32` & check nullity
    
    Signed-off-by: Austin Liu <[email protected]>
    
    * Clean up
    
    Signed-off-by: Austin Liu <[email protected]>
    
    * Refine
    
    Signed-off-by: Austin Liu <[email protected]>
    
    * Use `from_unary` instead
    
    Signed-off-by: Austin Liu <[email protected]>
    
    * Prevent inspect the string data
    
    Signed-off-by: Austin Liu <[email protected]>
    
    * Clean up
    
    Signed-off-by: Austin Liu <[email protected]>
    
    ---------
    
    Signed-off-by: Austin Liu <[email protected]>
---
 arrow-string/src/length.rs | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/arrow-string/src/length.rs b/arrow-string/src/length.rs
index 97f876a9f..6a28d44ea 100644
--- a/arrow-string/src/length.rs
+++ b/arrow-string/src/length.rs
@@ -137,6 +137,15 @@ pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, 
ArrowError> {
             let list = array.as_string::<i64>();
             Ok(bit_length_impl::<Int64Type>(list.offsets(), list.nulls()))
         }
+        DataType::Utf8View => {
+            let list = array.as_string_view();
+            let values = list
+                .views()
+                .iter()
+                .map(|view| (*view as i32).wrapping_mul(8))
+                .collect();
+            Ok(Arc::new(Int32Array::new(values, array.nulls().cloned())))
+        }
         DataType::Binary => {
             let list = array.as_binary::<i32>();
             Ok(bit_length_impl::<Int32Type>(list.offsets(), list.nulls()))
@@ -462,6 +471,35 @@ mod tests {
             })
     }
 
+    #[test]
+    fn bit_length_test_utf8view() {
+        bit_length_cases()
+            .into_iter()
+            .for_each(|(input, len, expected)| {
+                let string_array = StringViewArray::from(input);
+                let result = bit_length(&string_array).unwrap();
+                assert_eq!(len, result.len());
+                let result = 
result.as_any().downcast_ref::<Int32Array>().unwrap();
+                expected.iter().enumerate().for_each(|(i, value)| {
+                    assert_eq!(*value, result.value(i));
+                });
+            })
+    }
+
+    #[test]
+    fn bit_length_null_utf8view() {
+        bit_length_null_cases()
+            .into_iter()
+            .for_each(|(input, len, expected)| {
+                let array = StringArray::from(input);
+                let result = bit_length(&array).unwrap();
+                assert_eq!(len, result.len());
+                let result = 
result.as_any().downcast_ref::<Int32Array>().unwrap();
+
+                let expected: Int32Array = expected.into();
+                assert_eq!(&expected, result);
+            })
+    }
     #[test]
     fn bit_length_binary() {
         let value: Vec<&[u8]> = vec![b"one", &[0xff, 0xf8], b"three"];

Reply via email to