Dandandan commented on code in PR #9746: URL: https://github.com/apache/arrow-rs/pull/9746#discussion_r3099285744
##########
parquet/src/encodings/rle.rs:
##########
@@ -514,16 +514,28 @@ impl RleDecoder {
break;
}
{
+ #[cold]
+ #[inline(never)]
+ fn oob(max_idx: u32, dict_len: usize) -> ! {
+ panic!(
+ "dictionary index out of bounds: the len is
{dict_len} but the index is {max_idx}"
+ )
+ }
+ const CHUNK: usize = 16;
let out = &mut buffer[values_read..values_read +
num_values];
let idx = &index_buf[..num_values];
- let mut out_chunks = out.chunks_exact_mut(8);
- let idx_chunks = idx.chunks_exact(8);
+ let dict_len = dict.len();
+ let mut out_chunks = out.chunks_exact_mut(CHUNK);
+ let idx_chunks = idx.chunks_exact(CHUNK);
for (out_chunk, idx_chunk) in
out_chunks.by_ref().zip(idx_chunks) {
- let dict_len = dict.len();
- assert!(
- idx_chunk.iter().all(|&i| (i as usize) <
dict_len),
- "dictionary index out of bounds"
- );
+ // u32 max-reduction instead of `.all(|&i| ..)`:
`.all`
Review Comment:
Oof :)
I compared it now as well to `.fold(true, |a, &i| a & ((i as u32) <
dict_len_u32))`, looks like computing the max-then-compare does also generate
better code (and benchmarks) than compare-against-max:
```
max-reduce:
ldp q1, q0, [x13, #32]
ldp q3, q2, [x13]
umax.4s v4, v2, v0
umax.4s v3, v3, v1
umax.4s v3, v3, v4
umaxv.4s s3, v3
fmov w8, s3
cmp x21, x8
b.ls LBB2735_48
AND-reduce :
ldp q1, q0, [x8, #32]
ldp q3, q2, [x8]
cmhs.4s v4, v2, v6
cmhs.4s v3, v3, v6
uzp1.8h v3, v3, v4
cmhs.4s v4, v1, v6
cmhs.4s v5, v0, v6
uzp1.8h v4, v4, v5
uzp1.16b v3, v3, v4
umaxv.16b b3, v3
fmov w14, s3
tbnz w14, #0, LBB2735_48
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
