jhorstmann commented on code in PR #9746:
URL: https://github.com/apache/arrow-rs/pull/9746#discussion_r3098956779


##########
parquet/src/encodings/rle.rs:
##########
@@ -514,16 +514,28 @@ impl RleDecoder {
                         break;
                     }
                     {
+                        #[cold]
+                        #[inline(never)]
+                        fn oob(max_idx: u32, dict_len: usize) -> ! {
+                            panic!(
+                                "dictionary index out of bounds: the len is 
{dict_len} but the index is {max_idx}"
+                            )
+                        }
+                        const CHUNK: usize = 16;
                         let out = &mut buffer[values_read..values_read + 
num_values];
                         let idx = &index_buf[..num_values];
-                        let mut out_chunks = out.chunks_exact_mut(8);
-                        let idx_chunks = idx.chunks_exact(8);
+                        let dict_len = dict.len();
+                        let mut out_chunks = out.chunks_exact_mut(CHUNK);
+                        let idx_chunks = idx.chunks_exact(CHUNK);
                         for (out_chunk, idx_chunk) in 
out_chunks.by_ref().zip(idx_chunks) {
-                            let dict_len = dict.len();
-                            assert!(
-                                idx_chunk.iter().all(|&i| (i as usize) < 
dict_len),
-                                "dictionary index out of bounds"
-                            );
+                            // u32 max-reduction instead of `.all(|&i| ..)`: 
`.all`
+                            // short-circuits and blocks autovectorisation. 
Negative
+                            // i32 cast to u32 becomes a large value so the 
bounds
+                            // check still rejects it.
+                            let max_idx = idx_chunk.iter().fold(0u32, |acc, 
&i| acc.max(i as u32));
+                            if (max_idx as usize) >= dict_len {
+                                oob(max_idx, dict_len);

Review Comment:
   We should probably also return an error here instead of panicking, since the 
panic can be triggered by input provided by a user.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to