alamb opened a new issue, #7124:
URL: https://github.com/apache/arrow-rs/issues/7124

   **Describe the bug**
   When certain invalid arrow-ipc data is read it results in a panic rather 
than an error
   
   
https://github.com/apache/arrow-rs?tab=readme-ov-file#guidelines-for-panic-vs-result
   
   **To Reproduce**
   
   ```diff
   --- a/arrow-ipc/src/reader.rs
   +++ b/arrow-ipc/src/reader.rs
   @@ -1480,7 +1480,7 @@ impl<R: Read> RecordBatchReader for StreamReader<R> {
   
    #[cfg(test)]
    mod tests {
   -    use crate::writer::{unslice_run_array, DictionaryTracker, 
IpcDataGenerator, IpcWriteOptions};
   +    use crate::writer::{unslice_run_array, DictionaryTracker, 
IpcDataGenerator, IpcWriteOptions, StreamWriter};
   
        use super::*;
   
   @@ -2472,4 +2472,46 @@ mod tests {
                    assert_eq!(decoded_batch.expect("Failed to read 
RecordBatch"), batch);
                });
        }
   +
   +
   +    #[test]
   +    fn test_invalid_primitive_read() {
   +            // Int32Array with not enough nulls
   +        fn builder_with_invalid_data() -> ArrayDataBuilder {
   +                let nulls = NullBuffer::from(&[true, false, true, false]);
   +
   +                let buffer = 
Buffer::from(ScalarBuffer::<i32>::from_iter(0..8000));
   +                ArrayDataBuilder::new(DataType::Int32)
   +                    .len(8000)
   +                    .add_buffer(buffer.clone())
   +                    .nulls(Some(nulls.clone())) // too few nulls, INVALID
   +            }
   +
   +        // build fails as expected
   +        let expected_err = "Invalid argument error: null_bit_buffer size 
too small. got 1 needed 1000";
   +        let err = builder_with_invalid_data().build().unwrap_err();
   +        assert_eq!(err.to_string(), expected_err);
   +
   +        // however, when we create an invalid array and try to read it 
back, we
   +        // should get an error on read.
   +        let data =  builder_with_invalid_data();
   +        let array: ArrayRef = unsafe {
   +            Arc::new(Int32Array::from(data.build_unchecked()))
   +        };
   +        let batch = RecordBatch::try_from_iter(
   +            vec![("a", array)]
   +        ).unwrap();
   +
   +        let mut buf = vec![];
   +        let mut writer = StreamWriter::try_new(&mut buf, 
&batch.schema()).unwrap();
   +        writer.write(&batch).unwrap();
   +        writer.close().unwrap();
   +
   +        // reading the invalid data back should fail, but instead panics
   +        let mut reader = StreamReader::try_new(std::io::Cursor::new(buf), 
None).unwrap();
   +        let err = reader.next()
   +                        .unwrap() // expect there to be a batch, but panics 
instead
   +            .unwrap_err(); // expect it to be a validation error -- THIS 
DOES NOT FAIL
   +        assert_eq!(err.to_string(), expected_err); // should be same error 
as above
   +    }
    }
   
   ```
   
   Panics rather than generating a correct error:
   
   ```
   buffer not large enough (offset: 0, len: 8000, buffer_len: 1)
   thread 'reader::tests::test_invalid_primitive_read' panicked at 
arrow-buffer/src/buffer/boolean.rs:65:9:
   buffer not large enough (offset: 0, len: 8000, buffer_len: 1)
   stack backtrace:
      0: rust_begin_unwind
                at 
/rustc/e71f9a9a98b0faf423844bf0ba7438f29dc27d58/library/std/src/panicking.rs:665:5
      1: core::panicking::panic_fmt
                at 
/rustc/e71f9a9a98b0faf423844bf0ba7438f29dc27d58/library/core/src/panicking.rs:76:14
      2: arrow_buffer::buffer::boolean::BooleanBuffer::new
                at 
/Users/andrewlamb/Software/arrow-rs/arrow-buffer/src/buffer/boolean.rs:65:9
      3: arrow_data::data::ArrayDataBuilder::build::{{closure}}
                at 
/Users/andrewlamb/Software/arrow-rs/arrow-data/src/data.rs:1967:30
      4: core::option::Option<T>::or_else
                at 
/Users/andrewlamb/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/option.rs:1546:21
      5: arrow_data::data::ArrayDataBuilder::build
                at 
/Users/andrewlamb/Software/arrow-rs/arrow-data/src/data.rs:1964:21
      6: arrow_ipc::reader::RecordBatchDecoder::create_primitive_array
                at ./src/reader.rs:289:26
      7: arrow_ipc::reader::RecordBatchDecoder::create_array
                at ./src/reader.rs:252:17
      8: arrow_ipc::reader::RecordBatchDecoder::read_record_batch
                at ./src/reader.rs:470:29
      9: arrow_ipc::reader::StreamReader<R>::maybe_next
                at ./src/reader.rs:1411:17
     10: <arrow_ipc::reader::StreamReader<R> as 
core::iter::traits::iterator::Iterator>::next
                at ./src/reader.rs:1471:9
     11: arrow_ipc::reader::tests::test_invalid_primitive_read
                at ./src/reader.rs:2512:19
     12: arrow_ipc::reader::tests::test_invalid_primitive_read::{{closure}}
                at ./src/reader.rs:2478:37
     13: core::ops::function::FnOnce::call_once
                at 
/Users/andrewlamb/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/ops/function.rs:250:5
     14: core::ops::function::FnOnce::call_once
                at 
/rustc/e71f9a9a98b0faf423844bf0ba7438f29dc27d58/library/core/src/ops/function.rs:250:5
   note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose 
backtrace.
   ```
   
   **Expected behavior**
   I expect the test to pass (an error to be returned), rather than a panic
   
   **Additional context**
   
   I think this would be a good actual test to use as it will verify validation 
in FileWriter, StreamWriter, and FileDecoder
   
   ```rust
       #[test]
       fn test_validation_of_invalid_primitive_array() {
           // Int32Array with not enough nulls
           let array = unsafe {
               let nulls = NullBuffer::from(&[true, false, true, false]);
   
               let buffer = ScalarBuffer::<i32>::from_iter(0..8000);
               let data = ArrayDataBuilder::new(DataType::Int32)
                   .len(8000)
                   .add_buffer(buffer.into())
                   .nulls(Some(nulls)) // too few nulls
                   .build_unchecked();
   
               Int32Array::from(data)
           };
   
           expect_ipc_validation_error(
               Arc::new(array),
               "Invalid argument error: Nulls do not match",
           );
       }
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to