alamb opened a new issue, #7124: URL: https://github.com/apache/arrow-rs/issues/7124
**Describe the bug** When certain invalid arrow-ipc data is read it results in a panic rather than an error https://github.com/apache/arrow-rs?tab=readme-ov-file#guidelines-for-panic-vs-result **To Reproduce** ```diff --- a/arrow-ipc/src/reader.rs +++ b/arrow-ipc/src/reader.rs @@ -1480,7 +1480,7 @@ impl<R: Read> RecordBatchReader for StreamReader<R> { #[cfg(test)] mod tests { - use crate::writer::{unslice_run_array, DictionaryTracker, IpcDataGenerator, IpcWriteOptions}; + use crate::writer::{unslice_run_array, DictionaryTracker, IpcDataGenerator, IpcWriteOptions, StreamWriter}; use super::*; @@ -2472,4 +2472,46 @@ mod tests { assert_eq!(decoded_batch.expect("Failed to read RecordBatch"), batch); }); } + + + #[test] + fn test_invalid_primitive_read() { + // Int32Array with not enough nulls + fn builder_with_invalid_data() -> ArrayDataBuilder { + let nulls = NullBuffer::from(&[true, false, true, false]); + + let buffer = Buffer::from(ScalarBuffer::<i32>::from_iter(0..8000)); + ArrayDataBuilder::new(DataType::Int32) + .len(8000) + .add_buffer(buffer.clone()) + .nulls(Some(nulls.clone())) // too few nulls, INVALID + } + + // build fails as expected + let expected_err = "Invalid argument error: null_bit_buffer size too small. got 1 needed 1000"; + let err = builder_with_invalid_data().build().unwrap_err(); + assert_eq!(err.to_string(), expected_err); + + // however, when we create an invalid array and try to read it back, we + // should get an error on read. + let data = builder_with_invalid_data(); + let array: ArrayRef = unsafe { + Arc::new(Int32Array::from(data.build_unchecked())) + }; + let batch = RecordBatch::try_from_iter( + vec![("a", array)] + ).unwrap(); + + let mut buf = vec![]; + let mut writer = StreamWriter::try_new(&mut buf, &batch.schema()).unwrap(); + writer.write(&batch).unwrap(); + writer.close().unwrap(); + + // reading the invalid data back should fail, but instead panics + let mut reader = StreamReader::try_new(std::io::Cursor::new(buf), None).unwrap(); + let err = reader.next() + .unwrap() // expect there to be a batch, but panics instead + .unwrap_err(); // expect it to be a validation error -- THIS DOES NOT FAIL + assert_eq!(err.to_string(), expected_err); // should be same error as above + } } ``` Panics rather than generating a correct error: ``` buffer not large enough (offset: 0, len: 8000, buffer_len: 1) thread 'reader::tests::test_invalid_primitive_read' panicked at arrow-buffer/src/buffer/boolean.rs:65:9: buffer not large enough (offset: 0, len: 8000, buffer_len: 1) stack backtrace: 0: rust_begin_unwind at /rustc/e71f9a9a98b0faf423844bf0ba7438f29dc27d58/library/std/src/panicking.rs:665:5 1: core::panicking::panic_fmt at /rustc/e71f9a9a98b0faf423844bf0ba7438f29dc27d58/library/core/src/panicking.rs:76:14 2: arrow_buffer::buffer::boolean::BooleanBuffer::new at /Users/andrewlamb/Software/arrow-rs/arrow-buffer/src/buffer/boolean.rs:65:9 3: arrow_data::data::ArrayDataBuilder::build::{{closure}} at /Users/andrewlamb/Software/arrow-rs/arrow-data/src/data.rs:1967:30 4: core::option::Option<T>::or_else at /Users/andrewlamb/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/option.rs:1546:21 5: arrow_data::data::ArrayDataBuilder::build at /Users/andrewlamb/Software/arrow-rs/arrow-data/src/data.rs:1964:21 6: arrow_ipc::reader::RecordBatchDecoder::create_primitive_array at ./src/reader.rs:289:26 7: arrow_ipc::reader::RecordBatchDecoder::create_array at ./src/reader.rs:252:17 8: arrow_ipc::reader::RecordBatchDecoder::read_record_batch at ./src/reader.rs:470:29 9: arrow_ipc::reader::StreamReader<R>::maybe_next at ./src/reader.rs:1411:17 10: <arrow_ipc::reader::StreamReader<R> as core::iter::traits::iterator::Iterator>::next at ./src/reader.rs:1471:9 11: arrow_ipc::reader::tests::test_invalid_primitive_read at ./src/reader.rs:2512:19 12: arrow_ipc::reader::tests::test_invalid_primitive_read::{{closure}} at ./src/reader.rs:2478:37 13: core::ops::function::FnOnce::call_once at /Users/andrewlamb/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/ops/function.rs:250:5 14: core::ops::function::FnOnce::call_once at /rustc/e71f9a9a98b0faf423844bf0ba7438f29dc27d58/library/core/src/ops/function.rs:250:5 note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace. ``` **Expected behavior** I expect the test to pass (an error to be returned), rather than a panic **Additional context** I think this would be a good actual test to use as it will verify validation in FileWriter, StreamWriter, and FileDecoder ```rust #[test] fn test_validation_of_invalid_primitive_array() { // Int32Array with not enough nulls let array = unsafe { let nulls = NullBuffer::from(&[true, false, true, false]); let buffer = ScalarBuffer::<i32>::from_iter(0..8000); let data = ArrayDataBuilder::new(DataType::Int32) .len(8000) .add_buffer(buffer.into()) .nulls(Some(nulls)) // too few nulls .build_unchecked(); Int32Array::from(data) }; expect_ipc_validation_error( Arc::new(array), "Invalid argument error: Nulls do not match", ); } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
