This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 4086409 ARROW-11577: [Rust] Fix Array transform on strings
4086409 is described below
commit 4086409e1b4cf4feac3b5c84060c69e6c7de898d
Author: Ben Chambers <[email protected]>
AuthorDate: Wed Feb 10 06:29:00 2021 -0500
ARROW-11577: [Rust] Fix Array transform on strings
Specifically, this fixes a bug found when applying `concat` to slices of
input `StringArray`.
Closes #9460 from bjchambers/ARROW-11577-concat-string-slices
Authored-by: Ben Chambers <[email protected]>
Signed-off-by: Andrew Lamb <[email protected]>
---
rust/arrow/src/array/transform/variable_size.rs | 8 +++----
rust/arrow/src/compute/kernels/concat.rs | 31 +++++++++++++++++++++++++
2 files changed, 34 insertions(+), 5 deletions(-)
diff --git a/rust/arrow/src/array/transform/variable_size.rs
b/rust/arrow/src/array/transform/variable_size.rs
index 3ede26d..c9304db 100644
--- a/rust/arrow/src/array/transform/variable_size.rs
+++ b/rust/arrow/src/array/transform/variable_size.rs
@@ -41,7 +41,7 @@ fn extend_offset_values<T: OffsetSizeTrait>(
pub(super) fn build_extend<T: OffsetSizeTrait>(array: &ArrayData) -> Extend {
let offsets = array.buffer::<T>(0);
- let values = &array.buffers()[1].as_slice()[array.offset()..];
+ let values = array.buffers()[1].as_slice();
if array.null_count() == 0 {
// fast case where we can copy regions without null issues
Box::new(
@@ -78,12 +78,10 @@ pub(super) fn build_extend<T: OffsetSizeTrait>(array:
&ArrayData) -> Extend {
// compute the new offset
let length = offsets[i + 1] - offsets[i];
last_offset += length;
- let length = length.to_usize().unwrap();
// append value
- let start = offsets[i].to_usize().unwrap()
- - offsets[0].to_usize().unwrap();
- let bytes = &values[start..(start + length)];
+ let bytes = &values[offsets[i].to_usize().unwrap()
+ ..offsets[i + 1].to_usize().unwrap()];
values_buffer.extend_from_slice(bytes);
}
// offsets are always present
diff --git a/rust/arrow/src/compute/kernels/concat.rs
b/rust/arrow/src/compute/kernels/concat.rs
index a51831c..5df0bd5 100644
--- a/rust/arrow/src/compute/kernels/concat.rs
+++ b/rust/arrow/src/compute/kernels/concat.rs
@@ -390,4 +390,35 @@ mod tests {
Ok(())
}
+
+ #[test]
+ fn test_string_array_slices() -> Result<()> {
+ let input_1 = StringArray::from(vec!["hello", "A", "B", "C"]);
+ let input_2 = StringArray::from(vec!["world", "D", "E", "Z"]);
+
+ let arr = concat(&[input_1.slice(1, 3).as_ref(), input_2.slice(1,
2).as_ref()])?;
+
+ let expected_output = StringArray::from(vec!["A", "B", "C", "D", "E"]);
+
+ let actual_output =
arr.as_any().downcast_ref::<StringArray>().unwrap();
+ assert_eq!(actual_output, &expected_output);
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_string_array_with_null_slices() -> Result<()> {
+ let input_1 = StringArray::from(vec![Some("hello"), None, Some("A"),
Some("C")]);
+ let input_2 = StringArray::from(vec![None, Some("world"), Some("D"),
None]);
+
+ let arr = concat(&[input_1.slice(1, 3).as_ref(), input_2.slice(1,
2).as_ref()])?;
+
+ let expected_output =
+ StringArray::from(vec![None, Some("A"), Some("C"), Some("world"),
Some("D")]);
+
+ let actual_output =
arr.as_any().downcast_ref::<StringArray>().unwrap();
+ assert_eq!(actual_output, &expected_output);
+
+ Ok(())
+ }
}