fmonjalet commented on code in PR #8019:
URL: https://github.com/apache/arrow-rs/pull/8019#discussion_r2253625110


##########
arrow-string/src/length.rs:
##########
@@ -164,6 +195,30 @@ pub fn bit_length(array: &dyn Array) -> Result<ArrayRef, 
ArrowError> {
     }
 }
 
+fn run_end_length_impl<T: ArrowPrimitiveType>(
+    run_ends: &PrimitiveArray<T>,
+    value_lengths: &Int32Array,
+) -> Result<ArrayRef, ArrowError> {
+    let mut builder = Int32Builder::with_capacity(run_ends.len());
+    let mut prev_end = T::Native::default();
+
+    for i in 0..run_ends.len() {
+        let end = run_ends.value(i).to_usize().unwrap();
+        let start = prev_end.to_usize().unwrap();

Review Comment:
   For Int64 it looks like it may panic on 32bit architectures, should we use 
to_i64 instead?



##########
arrow-string/src/substring.rs:
##########
@@ -976,4 +1022,259 @@ mod tests {
         let expected = BinaryArray::from(vec![Some(expected_bytes)]);
         assert_eq!(expected, *actual);
     }
+    #[cfg(test)]
+    mod tests_ree {
+        use super::substring;
+        use arrow_array::{
+            array::{RunArray, StringArray},
+            types::{Int16Type, Int32Type, Int64Type, RunEndIndexType},
+            Array, Int32Array,
+        };
+        use arrow_buffer::{ArrowNativeType, ScalarBuffer};
+
+        #[test]
+        fn test_substring_ree_basic() {
+            // logical : [hello,hello,world,world]
+            let run_ends = Int32Array::from(vec![2, 4]);
+            let values = StringArray::from(vec![Some("hello"), Some("world")]);
+            let run_array = RunArray::<Int32Type>::try_new(&run_ends, 
&values).unwrap();
+
+            let expected_run_ends = Int32Array::from(vec![2, 4]);
+            let expected_values = StringArray::from(vec![Some("ell"), 
Some("orl")]);
+            let expected_run_array =
+                RunArray::<Int32Type>::try_new(&expected_run_ends, 
&expected_values).unwrap();
+
+            let result = substring(&run_array, 1, Some(3)).unwrap();
+            let result = result
+                .as_any()
+                .downcast_ref::<RunArray<Int32Type>>()
+                .unwrap();
+            assert_eq!(result.values(), expected_run_array.values());
+            assert_eq!(
+                result.run_ends().values(),
+                expected_run_array.run_ends().values()
+            );
+        }
+        #[test]
+        fn test_substring_ree_all_nulls() {
+            let run_ends = Int32Array::from(vec![1, 4]);
+            let values = StringArray::from(vec![Some(""), None]);
+            let run_array = RunArray::<Int32Type>::try_new(&run_ends, 
&values).unwrap();
+
+            let expected_run_ends = Int32Array::from(vec![1, 4]);
+            let expected_values = StringArray::from(vec![Some(""), None]);
+            let expected_run_array =
+                RunArray::<Int32Type>::try_new(&expected_run_ends, 
&expected_values).unwrap();
+
+            let result = substring(&run_array, 1, Some(3)).unwrap();
+            let result = result
+                .as_any()
+                .downcast_ref::<RunArray<Int32Type>>()
+                .unwrap();
+
+            assert_eq!(result.values(), expected_run_array.values());
+            assert_eq!(
+                result.run_ends().values(),
+                expected_run_array.run_ends().values()
+            );
+        }
+        // Review this before Pushing
+        #[test]
+        fn test_substring_ree_mixed_values() {
+            // logical: ["abc", NULL, NULL, "xyz", "xyz", "xyz"]
+            let run_ends = Int32Array::from(vec![1, 3, 6]);
+            let values = StringArray::from(vec![Some("abc"), None, 
Some("xyz")]);
+            let run_array = RunArray::<Int32Type>::try_new(&run_ends, 
&values).unwrap();
+
+            let expected_run_ends = Int32Array::from(vec![1, 3, 6]);
+            let expected_values = StringArray::from(vec![Some("bc"), None, 
Some("yz")]);
+            let expected_run_array =
+                RunArray::<Int32Type>::try_new(&expected_run_ends, 
&expected_values).unwrap();
+
+            let result = substring(&run_array, 1, Some(2)).unwrap();
+            let result = result
+                .as_any()
+                .downcast_ref::<RunArray<Int32Type>>()
+                .unwrap();
+
+            assert_eq!(result.values(), expected_run_array.values());
+            assert_eq!(
+                result.run_ends().values(),
+                expected_run_array.run_ends().values()
+            );
+        }
+        // Review this before Pushing
+        #[test]
+        fn test_substring_ree_empty_strings() {
+            let run_ends = Int32Array::from(vec![3]);
+            let values = StringArray::from(vec![Some("")]);
+            let run_array = RunArray::<Int32Type>::try_new(&run_ends, 
&values).unwrap();
+
+            let expected_run_ends = Int32Array::from(vec![3]);
+            let expected_values = StringArray::from(vec![Some("")]);
+            let expected_run_array =
+                RunArray::<Int32Type>::try_new(&expected_run_ends, 
&expected_values).unwrap();
+
+            let result = substring(&run_array, 0, Some(3)).unwrap();
+            let result = result
+                .as_any()
+                .downcast_ref::<RunArray<Int32Type>>()
+                .unwrap();
+
+            assert_eq!(result.values(), expected_run_array.values());
+            assert_eq!(
+                result.run_ends().values(),
+                expected_run_array.run_ends().values()
+            );
+        }
+        // Review before pushing PR

Review Comment:
   (Left over comment)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to