alamb commented on code in PR #6779:
URL: https://github.com/apache/arrow-rs/pull/6779#discussion_r1890220694
##########
arrow-select/src/interleave.rs:
##########
@@ -461,4 +499,124 @@ mod tests {
DictionaryArray::<Int32Type>::from_iter(vec![Some("0"), Some("1"),
Some("2"), None]);
assert_eq!(array.as_ref(), &expected)
}
+
+ #[test]
+ fn test_interleave_views() {
+ let values = StringArray::from_iter_values([
+ "hello",
+ "world_long_string_not_inlined",
+ "foo",
+ "bar",
+ "baz",
+ ]);
+ let view_a = StringViewArray::from(&values);
+
+ let values = StringArray::from_iter_values([
+ "test",
+ "data",
+ "more_long_string_not_inlined",
+ "views",
+ "here",
+ ]);
+ let view_b = StringViewArray::from(&values);
+
+ let indices = &[
+ (0, 2), // "foo"
+ (1, 0), // "test"
+ (0, 4), // "baz"
+ (1, 3), // "views"
+ (0, 1), // "world_long_string_not_inlined"
Review Comment:
since the test only picks one long string, only one buffer is going to be
copied (the other strings are inlined)
To provoke the issue described in the ticket, I think you need to interleave
that same long string from the two different buffers. Something like
```
let indices = &[
(0, 1), // "world_long_string_not_inlined" (view_a)
(1, 2), // "world_long_string_not_inlined" (view_b)
(0, 1), // "world_long_string_not_inlined" (view_a)
(1, 2), // "world_long_string_not_inlined" (view_b)
(0, 1), // "world_long_string_not_inlined" (view_a)
(1, 2), // "world_long_string_not_inlined" (view_b)
(0, 1), // "world_long_string_not_inlined" (view_a)
(1, 2), // "world_long_string_not_inlined" (view_b)
(0, 1), // "world_long_string_not_inlined" (view_a)
(1, 2), // "world_long_string_not_inlined" (view_b)
```
And make sure the output view only has 2 buffers (the one from view_a and
the one from view_b)
##########
arrow-select/src/interleave.rs:
##########
@@ -231,6 +235,40 @@ fn interleave_dictionaries<K: ArrowDictionaryKeyType>(
Ok(Arc::new(array))
}
+fn interleave_views<T: ByteViewType>(
+ values: &[&dyn Array],
+ indices: &[(usize, usize)],
+) -> Result<ArrayRef, ArrowError> {
+ let interleaved = Interleave::<'_, GenericByteViewArray<T>>::new(values,
indices);
+ let mut views_builder = BufferBuilder::new(indices.len());
+ let mut buffers = Vec::new();
+
+ let mut buffer_lookup = HashMap::new();
+ for (array_idx, value_idx) in indices {
+ let array = interleaved.arrays[*array_idx];
+ let raw_view = array.views().get(*value_idx).unwrap();
+ let view = ByteView::from(*raw_view);
Review Comment:
other code that manipulates vies checks the length before casting
##########
arrow-select/src/interleave.rs:
##########
@@ -461,4 +499,124 @@ mod tests {
DictionaryArray::<Int32Type>::from_iter(vec![Some("0"), Some("1"),
Some("2"), None]);
assert_eq!(array.as_ref(), &expected)
}
+
+ #[test]
+ fn test_interleave_views() {
+ let values = StringArray::from_iter_values([
+ "hello",
+ "world_long_string_not_inlined",
+ "foo",
+ "bar",
+ "baz",
+ ]);
+ let view_a = StringViewArray::from(&values);
+
+ let values = StringArray::from_iter_values([
+ "test",
+ "data",
+ "more_long_string_not_inlined",
+ "views",
+ "here",
+ ]);
+ let view_b = StringViewArray::from(&values);
+
+ let indices = &[
+ (0, 2), // "foo"
+ (1, 0), // "test"
+ (0, 4), // "baz"
+ (1, 3), // "views"
+ (0, 1), // "world_long_string_not_inlined"
+ ];
+
+ // Test specialized implementation
+ let values = interleave(&[&view_a, &view_b], indices).unwrap();
+ let result = values.as_string_view();
+ assert_eq!(result.data_buffers().len(), 1);
+
+ // Test fallback implementation
Review Comment:
Since interleave_fallback isn't pub I don't think we should be testing it
directly. Intead you should arrange the paramters to call it directly
##########
arrow-select/src/interleave.rs:
##########
@@ -231,6 +235,39 @@ fn interleave_dictionaries<K: ArrowDictionaryKeyType>(
Ok(Arc::new(array))
}
+fn interleave_views<T: ByteViewType>(
+ values: &[&dyn Array],
+ indices: &[(usize, usize)],
+) -> Result<ArrayRef, ArrowError> {
+ let interleaved = Interleave::<'_, GenericByteViewArray<T>>::new(values,
indices);
+ let mut views_builder = BufferBuilder::new(indices.len());
+ let mut buffers = Vec::with_capacity(values[0].len());
+
+ let mut buffer_lookup = HashMap::new();
Review Comment:
Can you also please add some comments about what the buffer lookup
represents? I think it is
```rust
// (input array_index, input buffer_index) --> output array buffer_index
```
##########
arrow-select/src/interleave.rs:
##########
@@ -461,4 +499,124 @@ mod tests {
DictionaryArray::<Int32Type>::from_iter(vec![Some("0"), Some("1"),
Some("2"), None]);
assert_eq!(array.as_ref(), &expected)
}
+
+ #[test]
+ fn test_interleave_views() {
+ let values = StringArray::from_iter_values([
+ "hello",
+ "world_long_string_not_inlined",
+ "foo",
+ "bar",
+ "baz",
+ ]);
+ let view_a = StringViewArray::from(&values);
+
+ let values = StringArray::from_iter_values([
+ "test",
+ "data",
+ "more_long_string_not_inlined",
+ "views",
+ "here",
+ ]);
+ let view_b = StringViewArray::from(&values);
+
+ let indices = &[
+ (0, 2), // "foo"
+ (1, 0), // "test"
+ (0, 4), // "baz"
+ (1, 3), // "views"
+ (0, 1), // "world_long_string_not_inlined"
+ ];
+
+ // Test specialized implementation
+ let values = interleave(&[&view_a, &view_b], indices).unwrap();
+ let result = values.as_string_view();
+ assert_eq!(result.data_buffers().len(), 1);
+
+ // Test fallback implementation
+ let fallback = interleave_fallback(&[&view_a, &view_b],
indices).unwrap();
+ let fallback_result = fallback.as_string_view();
+ // note that fallback_result has 2 buffers, but only one long enough
string to warrant a buffer
+ assert_eq!(fallback_result.data_buffers().len(), 2);
+
+ // Convert to strings for easier assertion
+ let collected: Vec<_> = result.iter().map(|x| x.map(|s|
s.to_string())).collect();
+
+ let fallback_collected: Vec<_> = fallback_result
+ .iter()
+ .map(|x| x.map(|s| s.to_string()))
+ .collect();
+
+ assert_eq!(&collected, &fallback_collected);
+
+ assert_eq!(
+ &collected,
+ &[
+ Some("foo".to_string()),
+ Some("test".to_string()),
+ Some("baz".to_string()),
+ Some("views".to_string()),
+ Some("world_long_string_not_inlined".to_string()),
+ ]
+ );
+ }
+
+ #[test]
Review Comment:
I think another important test case is testing with a StringViewArray that
has multiple buffers (otherwise the logic to handle mapping different buffers
is not exercised)
So the input array should have multiple buffers as well
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]