alamb opened a new issue, #4576:
URL: https://github.com/apache/arrow-rs/issues/4576

   **Describe the bug**
   When casting Dictionary --> StringArray the `cast` kernel panics with 
   
   ```
   thread 'main' panicked at 'assertion failed: idx < self.len', 
/Users/alamb/.cargo/registry/src/index.crates.io-6f17d22bba15001f/arrow-buffer-44.0.0/src/buffer/boolean.rs:133:9
   ```
   
   
   **To Reproduce**
   Run this program:
   
   ```toml
   [package]
   name = "rust_arrow_playground"
   version = "0.1.0"
   edition = "2018"
   
   # See more keys and their definitions at 
https://doc.rust-lang.org/cargo/reference/manifest.html
   
   [dependencies]
   arrow = {version = "44.0.0", default-features = false }
   ```
   
   ```rust
   use std::{sync::Arc};
   
   use arrow::{array::{StringArray, DictionaryArray, Int32Array, Array}, 
datatypes::{Int32Type, DataType}, buffer::{NullBuffer, BooleanBuffer}};
   
   
   
   
   fn main() {
       let expected = vec![Some("a"), Some("a"), None, Some("c")];
   
       // Note that values must contain NULL to trigger this
       let values = StringArray::from(vec![Some("a"), Some("b"), Some("c"), 
None]);
   
       // third element is out of bounds in the values, but is also nulls so it 
should not cause issues
       let keys = Int32Array::from(vec![0, 0, 1000, 2]);
       let keys = set_null(keys, 2);
   
       let array = DictionaryArray::<Int32Type>::try_new(keys, 
Arc::new(values)).unwrap();
   
       let expected_array: DictionaryArray::<Int32Type> = 
expected.clone().into_iter().collect();
       assert_eq!(&array, &expected_array);
   
       // expand out to strings
       let new_array = arrow::compute::cast(&array, &DataType::Utf8).unwrap();
       let expected_array: StringArray = expected.clone().into_iter().collect();
   
       assert_eq!(new_array.as_ref(), &expected_array as &dyn Array);
   
       println!("Done!")
   }
   
   
   /// Sets the specific index to null and everything else non-null
   fn set_null(keys: Int32Array, index: usize) -> Int32Array {
       let len = keys.len();
   
       let nulls: Vec<bool> = (0..len).map(|i| i != index).collect();
       let nulls = BooleanBuffer::from(nulls);
   
       let (_data_type, buffer, _old_nulls) = keys.into_parts();
       Int32Array::new(buffer, Some(NullBuffer::from(nulls)))
   
   }
   ```
   
   
   This results in 
   ```
       Finished dev [unoptimized + debuginfo] target(s) in 0.81s
        Running `/Users/alamb/Software/target-df/debug/rust_arrow_playground`
   thread 'main' panicked at 'assertion failed: idx < self.len', 
/Users/alamb/.cargo/registry/src/index.crates.io-6f17d22bba15001f/arrow-buffer-44.0.0/src/buffer/boolean.rs:133:9
   stack backtrace:
      0: rust_begin_unwind
                at 
/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/std/src/panicking.rs:593:5
      1: core::panicking::panic_fmt
                at 
/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/core/src/panicking.rs:67:14
      2: core::panicking::panic
                at 
/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/core/src/panicking.rs:117:5
      3: arrow_buffer::buffer::boolean::BooleanBuffer::value
                at 
/Users/alamb/.cargo/registry/src/index.crates.io-6f17d22bba15001f/arrow-buffer-44.0.0/src/buffer/boolean.rs:133:9
      4: arrow_buffer::buffer::null::NullBuffer::is_valid
                at 
/Users/alamb/.cargo/registry/src/index.crates.io-6f17d22bba15001f/arrow-buffer-44.0.0/src/buffer/null.rs:145:9
      5: arrow_buffer::buffer::null::NullBuffer::is_null
                at 
/Users/alamb/.cargo/registry/src/index.crates.io-6f17d22bba15001f/arrow-buffer-44.0.0/src/buffer/null.rs:151:10
      6: arrow_array::array::Array::is_null::{{closure}}
                at 
/Users/alamb/.cargo/registry/src/index.crates.io-6f17d22bba15001f/arrow-array-44.0.0/src/array/mod.rs:193:30
      7: core::option::Option<T>::map
                at 
/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/core/src/option.rs:1075:29
      8: arrow_array::array::Array::is_null
                at 
/Users/alamb/.cargo/registry/src/index.crates.io-6f17d22bba15001f/arrow-array-44.0.0/src/array/mod.rs:193:9
      9: arrow_array::array::Array::is_valid
                at 
/Users/alamb/.cargo/registry/src/index.crates.io-6f17d22bba15001f/arrow-array-44.0.0/src/array/mod.rs:210:10
     10: arrow_select::take::take_bytes
                at 
/Users/alamb/.cargo/registry/src/index.crates.io-6f17d22bba15001f/arrow-select-44.0.0/src/take.rs:404:16
     11: arrow_select::take::take_impl
                at 
/Users/alamb/.cargo/registry/src/index.crates.io-6f17d22bba15001f/arrow-select-44.0.0/src/take.rs:126:25
     12: arrow_select::take::take
                at 
/Users/alamb/.cargo/registry/src/index.crates.io-6f17d22bba15001f/arrow-select-44.0.0/src/take.rs:81:5
     13: arrow_cast::cast::unpack_dictionary
                at 
/Users/alamb/.cargo/registry/src/index.crates.io-6f17d22bba15001f/arrow-cast-44.0.0/src/cast.rs:3095:13
     14: arrow_cast::cast::dictionary_cast
                at 
/Users/alamb/.cargo/registry/src/index.crates.io-6f17d22bba15001f/arrow-cast-44.0.0/src/cast.rs:3074:14
     15: arrow_cast::cast::cast_with_options
                at 
/Users/alamb/.cargo/registry/src/index.crates.io-6f17d22bba15001f/arrow-cast-44.0.0/src/cast.rs:779:22
     16: arrow_cast::cast::cast
                at 
/Users/alamb/.cargo/registry/src/index.crates.io-6f17d22bba15001f/arrow-cast-44.0.0/src/cast.rs:296:5
     17: rust_arrow_playground::main
                at ./src/main.rs:24:21
     18: core::ops::function::FnOnce::call_once
                at 
/rustc/8ede3aae28fe6e4d52b38157d7bfe0d3bceef225/library/core/src/ops/function.rs:250:5
   note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose 
backtrace.
   ```
   
   
   **Expected behavior**
   The program should not panic and should print `Done`
   
   **Additional context**
   We found this error using internal data
   
   @tustvold says he thinks the condition is backwards
   
   ```
   
   for (i, offset) in offsets.iter_mut().skip(1).enumerate() {
               let index = indices.value(i).to_usize().ok_or_else(|| {
                   ArrowError::ComputeError("Cast to usize failed".to_string())
               })?;
   
               if array.is_valid(index) && indices.is_valid(i) {
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to