This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 4086409  ARROW-11577: [Rust] Fix Array transform on strings
4086409 is described below

commit 4086409e1b4cf4feac3b5c84060c69e6c7de898d
Author: Ben Chambers <[email protected]>
AuthorDate: Wed Feb 10 06:29:00 2021 -0500

    ARROW-11577: [Rust] Fix Array transform on strings
    
    Specifically, this fixes a bug found when applying `concat` to slices of
    input `StringArray`.
    
    Closes #9460 from bjchambers/ARROW-11577-concat-string-slices
    
    Authored-by: Ben Chambers <[email protected]>
    Signed-off-by: Andrew Lamb <[email protected]>
---
 rust/arrow/src/array/transform/variable_size.rs |  8 +++----
 rust/arrow/src/compute/kernels/concat.rs        | 31 +++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/rust/arrow/src/array/transform/variable_size.rs 
b/rust/arrow/src/array/transform/variable_size.rs
index 3ede26d..c9304db 100644
--- a/rust/arrow/src/array/transform/variable_size.rs
+++ b/rust/arrow/src/array/transform/variable_size.rs
@@ -41,7 +41,7 @@ fn extend_offset_values<T: OffsetSizeTrait>(
 
 pub(super) fn build_extend<T: OffsetSizeTrait>(array: &ArrayData) -> Extend {
     let offsets = array.buffer::<T>(0);
-    let values = &array.buffers()[1].as_slice()[array.offset()..];
+    let values = array.buffers()[1].as_slice();
     if array.null_count() == 0 {
         // fast case where we can copy regions without null issues
         Box::new(
@@ -78,12 +78,10 @@ pub(super) fn build_extend<T: OffsetSizeTrait>(array: 
&ArrayData) -> Extend {
                         // compute the new offset
                         let length = offsets[i + 1] - offsets[i];
                         last_offset += length;
-                        let length = length.to_usize().unwrap();
 
                         // append value
-                        let start = offsets[i].to_usize().unwrap()
-                            - offsets[0].to_usize().unwrap();
-                        let bytes = &values[start..(start + length)];
+                        let bytes = &values[offsets[i].to_usize().unwrap()
+                            ..offsets[i + 1].to_usize().unwrap()];
                         values_buffer.extend_from_slice(bytes);
                     }
                     // offsets are always present
diff --git a/rust/arrow/src/compute/kernels/concat.rs 
b/rust/arrow/src/compute/kernels/concat.rs
index a51831c..5df0bd5 100644
--- a/rust/arrow/src/compute/kernels/concat.rs
+++ b/rust/arrow/src/compute/kernels/concat.rs
@@ -390,4 +390,35 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_string_array_slices() -> Result<()> {
+        let input_1 = StringArray::from(vec!["hello", "A", "B", "C"]);
+        let input_2 = StringArray::from(vec!["world", "D", "E", "Z"]);
+
+        let arr = concat(&[input_1.slice(1, 3).as_ref(), input_2.slice(1, 
2).as_ref()])?;
+
+        let expected_output = StringArray::from(vec!["A", "B", "C", "D", "E"]);
+
+        let actual_output = 
arr.as_any().downcast_ref::<StringArray>().unwrap();
+        assert_eq!(actual_output, &expected_output);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_string_array_with_null_slices() -> Result<()> {
+        let input_1 = StringArray::from(vec![Some("hello"), None, Some("A"), 
Some("C")]);
+        let input_2 = StringArray::from(vec![None, Some("world"), Some("D"), 
None]);
+
+        let arr = concat(&[input_1.slice(1, 3).as_ref(), input_2.slice(1, 
2).as_ref()])?;
+
+        let expected_output =
+            StringArray::from(vec![None, Some("A"), Some("C"), Some("world"), 
Some("D")]);
+
+        let actual_output = 
arr.as_any().downcast_ref::<StringArray>().unwrap();
+        assert_eq!(actual_output, &expected_output);
+
+        Ok(())
+    }
 }

Reply via email to