Jefffrey commented on code in PR #9373:
URL: https://github.com/apache/arrow-rs/pull/9373#discussion_r2801803143


##########
arrow-select/src/interleave.rs:
##########
@@ -200,12 +200,15 @@ fn interleave_dictionaries<K: ArrowDictionaryKeyType>(
     let dictionaries: Vec<_> = arrays.iter().map(|x| 
x.as_dictionary::<K>()).collect();
     let (should_merge, has_overflow) =
         should_merge_dictionary_values::<K>(&dictionaries, indices.len());
+
+    // If overflow detected, use generic fallback (not dictionary-specific)

Review Comment:
   Could we add a test if we're also fixing interleave?



##########
arrow-select/src/concat.rs:
##########
@@ -1900,4 +1908,141 @@ mod tests {
         assert_eq!(values.values(), &[10, 20, 30]);
         assert_eq!(&[2, 3, 5], run_ends);
     }
+
+    #[test]
+    fn test_concat_u8_dictionary_256_values() {
+        // Integration test: concat should work with exactly 256 unique values
+        // Use FixedSizeBinary to ensure the test actually catches the bug
+        use arrow_array::FixedSizeBinaryArray;
+
+        // Dictionary 1: 128 unique FixedSizeBinary(1) values
+        let values1 = FixedSizeBinaryArray::try_from_iter((0..128_u8).map(|i| 
vec![i])).unwrap();
+        let keys1 = UInt8Array::from((0..128).map(|i| i as 
u8).collect::<Vec<_>>());
+        let dict1 = DictionaryArray::<UInt8Type>::try_new(keys1, 
Arc::new(values1)).unwrap();
+
+        // Dictionary 2: 128 unique FixedSizeBinary(1) values (128..255)
+        let values2 =
+            FixedSizeBinaryArray::try_from_iter((128..256_u16).map(|i| vec![i 
as u8])).unwrap();
+        let keys2 = UInt8Array::from((0..128).map(|i| i as 
u8).collect::<Vec<_>>());
+        let dict2 = DictionaryArray::<UInt8Type>::try_new(keys2, 
Arc::new(values2)).unwrap();
+
+        // Concatenate → 128 + 128 = 256 unique values total
+        let result = concat(&[&dict1 as &dyn Array, &dict2 as &dyn Array]);
+        assert!(
+            result.is_ok(),
+            "Concat should succeed with 256 unique values for u8"
+        );
+
+        let concatenated = result.unwrap();
+        assert_eq!(
+            concatenated.len(),
+            256,
+            "Should have 256 total elements (128 + 128)"
+        );
+    }
+
+    #[test]
+    fn test_concat_u16_dictionary_65536_values() {
+        // Integration test: concat should work with exactly 65,536 unique 
values for u16
+        // Use two different dictionaries to force the merge code path
+        // Note: This test creates large arrays, so it may be slow
+
+        // Dictionary 1: "a0" .. "a32767" (32,768 values)
+        let values1 = StringArray::from((0..32768).map(|i| format!("a{}", 
i)).collect::<Vec<_>>());
+        let keys1 = UInt16Array::from((0..32768).map(|i| i as 
u16).collect::<Vec<_>>());
+        let dict1 = DictionaryArray::<UInt16Type>::try_new(keys1, 
Arc::new(values1)).unwrap();
+
+        // Dictionary 2: "b0" .. "b32767" (32,768 values)
+        let values2 = StringArray::from((0..32768).map(|i| format!("b{}", 
i)).collect::<Vec<_>>());
+        let keys2 = UInt16Array::from((0..32768).map(|i| i as 
u16).collect::<Vec<_>>());
+        let dict2 = DictionaryArray::<UInt16Type>::try_new(keys2, 
Arc::new(values2)).unwrap();
+
+        // Concatenate → 32,768 + 32,768 = 65,536 unique values total
+        let result = concat(&[&dict1 as &dyn Array, &dict2 as &dyn Array]);
+        assert!(
+            result.is_ok(),
+            "Concat should succeed with 65,536 unique values for u16"
+        );
+
+        let concatenated = result.unwrap();
+        assert_eq!(
+            concatenated.len(),
+            65536,
+            "Should have 65,536 total elements (32,768 + 32,768)"
+        );
+    }
+
+    #[test]
+    fn test_concat_returns_error_not_panic() {

Review Comment:
   I feel this test is redundant considering we already have a test checking 
overflow below (`test_concat_u8_dictionary_257_values_fails`)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to