alamb commented on code in PR #6779:
URL: https://github.com/apache/arrow-rs/pull/6779#discussion_r1890220694


##########
arrow-select/src/interleave.rs:
##########
@@ -461,4 +499,124 @@ mod tests {
             DictionaryArray::<Int32Type>::from_iter(vec![Some("0"), Some("1"), 
Some("2"), None]);
         assert_eq!(array.as_ref(), &expected)
     }
+
+    #[test]
+    fn test_interleave_views() {
+        let values = StringArray::from_iter_values([
+            "hello",
+            "world_long_string_not_inlined",
+            "foo",
+            "bar",
+            "baz",
+        ]);
+        let view_a = StringViewArray::from(&values);
+
+        let values = StringArray::from_iter_values([
+            "test",
+            "data",
+            "more_long_string_not_inlined",
+            "views",
+            "here",
+        ]);
+        let view_b = StringViewArray::from(&values);
+
+        let indices = &[
+            (0, 2), // "foo"
+            (1, 0), // "test"
+            (0, 4), // "baz"
+            (1, 3), // "views"
+            (0, 1), // "world_long_string_not_inlined"

Review Comment:
   since the test only picks one long string, only one buffer is going to be 
copied (the other strings are inlined)
   
   To provoke the issue described in the ticket, I think you need to interleave 
that same long string from the two different buffers. Something like
   
   ```
           let indices = &[
               (0, 1), // "world_long_string_not_inlined" (view_a)
               (1, 2), // "world_long_string_not_inlined" (view_b)
               (0, 1), // "world_long_string_not_inlined" (view_a)
               (1, 2), // "world_long_string_not_inlined" (view_b)
               (0, 1), // "world_long_string_not_inlined" (view_a)
               (1, 2), // "world_long_string_not_inlined" (view_b)
               (0, 1), // "world_long_string_not_inlined" (view_a)
               (1, 2), // "world_long_string_not_inlined" (view_b)
               (0, 1), // "world_long_string_not_inlined" (view_a)
               (1, 2), // "world_long_string_not_inlined" (view_b)
   ```
   
   And make sure the output view only has 2 buffers (the one from view_a and 
the one from view_b)



##########
arrow-select/src/interleave.rs:
##########
@@ -231,6 +235,40 @@ fn interleave_dictionaries<K: ArrowDictionaryKeyType>(
     Ok(Arc::new(array))
 }
 
+fn interleave_views<T: ByteViewType>(
+    values: &[&dyn Array],
+    indices: &[(usize, usize)],
+) -> Result<ArrayRef, ArrowError> {
+    let interleaved = Interleave::<'_, GenericByteViewArray<T>>::new(values, 
indices);
+    let mut views_builder = BufferBuilder::new(indices.len());
+    let mut buffers = Vec::new();
+
+    let mut buffer_lookup = HashMap::new();
+    for (array_idx, value_idx) in indices {
+        let array = interleaved.arrays[*array_idx];
+        let raw_view = array.views().get(*value_idx).unwrap();
+        let view = ByteView::from(*raw_view);

Review Comment:
   other code that manipulates vies checks the length before casting



##########
arrow-select/src/interleave.rs:
##########
@@ -461,4 +499,124 @@ mod tests {
             DictionaryArray::<Int32Type>::from_iter(vec![Some("0"), Some("1"), 
Some("2"), None]);
         assert_eq!(array.as_ref(), &expected)
     }
+
+    #[test]
+    fn test_interleave_views() {
+        let values = StringArray::from_iter_values([
+            "hello",
+            "world_long_string_not_inlined",
+            "foo",
+            "bar",
+            "baz",
+        ]);
+        let view_a = StringViewArray::from(&values);
+
+        let values = StringArray::from_iter_values([
+            "test",
+            "data",
+            "more_long_string_not_inlined",
+            "views",
+            "here",
+        ]);
+        let view_b = StringViewArray::from(&values);
+
+        let indices = &[
+            (0, 2), // "foo"
+            (1, 0), // "test"
+            (0, 4), // "baz"
+            (1, 3), // "views"
+            (0, 1), // "world_long_string_not_inlined"
+        ];
+
+        // Test specialized implementation
+        let values = interleave(&[&view_a, &view_b], indices).unwrap();
+        let result = values.as_string_view();
+        assert_eq!(result.data_buffers().len(), 1);
+
+        // Test fallback implementation

Review Comment:
   Since interleave_fallback isn't pub I don't think we should be testing it 
directly. Intead you should arrange the paramters to call it directly



##########
arrow-select/src/interleave.rs:
##########
@@ -231,6 +235,39 @@ fn interleave_dictionaries<K: ArrowDictionaryKeyType>(
     Ok(Arc::new(array))
 }
 
+fn interleave_views<T: ByteViewType>(
+    values: &[&dyn Array],
+    indices: &[(usize, usize)],
+) -> Result<ArrayRef, ArrowError> {
+    let interleaved = Interleave::<'_, GenericByteViewArray<T>>::new(values, 
indices);
+    let mut views_builder = BufferBuilder::new(indices.len());
+    let mut buffers = Vec::with_capacity(values[0].len());
+
+    let mut buffer_lookup = HashMap::new();

Review Comment:
   Can you also please add some comments about what the buffer lookup 
represents? I think it is 
   ```rust
   // (input array_index, input buffer_index) --> output array buffer_index
   ```



##########
arrow-select/src/interleave.rs:
##########
@@ -461,4 +499,124 @@ mod tests {
             DictionaryArray::<Int32Type>::from_iter(vec![Some("0"), Some("1"), 
Some("2"), None]);
         assert_eq!(array.as_ref(), &expected)
     }
+
+    #[test]
+    fn test_interleave_views() {
+        let values = StringArray::from_iter_values([
+            "hello",
+            "world_long_string_not_inlined",
+            "foo",
+            "bar",
+            "baz",
+        ]);
+        let view_a = StringViewArray::from(&values);
+
+        let values = StringArray::from_iter_values([
+            "test",
+            "data",
+            "more_long_string_not_inlined",
+            "views",
+            "here",
+        ]);
+        let view_b = StringViewArray::from(&values);
+
+        let indices = &[
+            (0, 2), // "foo"
+            (1, 0), // "test"
+            (0, 4), // "baz"
+            (1, 3), // "views"
+            (0, 1), // "world_long_string_not_inlined"
+        ];
+
+        // Test specialized implementation
+        let values = interleave(&[&view_a, &view_b], indices).unwrap();
+        let result = values.as_string_view();
+        assert_eq!(result.data_buffers().len(), 1);
+
+        // Test fallback implementation
+        let fallback = interleave_fallback(&[&view_a, &view_b], 
indices).unwrap();
+        let fallback_result = fallback.as_string_view();
+        // note that fallback_result has 2 buffers, but only one long enough 
string to warrant a buffer
+        assert_eq!(fallback_result.data_buffers().len(), 2);
+
+        // Convert to strings for easier assertion
+        let collected: Vec<_> = result.iter().map(|x| x.map(|s| 
s.to_string())).collect();
+
+        let fallback_collected: Vec<_> = fallback_result
+            .iter()
+            .map(|x| x.map(|s| s.to_string()))
+            .collect();
+
+        assert_eq!(&collected, &fallback_collected);
+
+        assert_eq!(
+            &collected,
+            &[
+                Some("foo".to_string()),
+                Some("test".to_string()),
+                Some("baz".to_string()),
+                Some("views".to_string()),
+                Some("world_long_string_not_inlined".to_string()),
+            ]
+        );
+    }
+
+    #[test]

Review Comment:
   I think another important test case is testing with a StringViewArray that 
has multiple buffers (otherwise the logic to handle mapping different buffers 
is not exercised)
   
   So the input array should have multiple buffers  as well



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to