This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 980ea0b36c Reduce per-byte overhead in VLQ integer decoding (#9584)
980ea0b36c is described below

commit 980ea0b36c79a9e996efd90ad5f24571f0f9c0e0
Author: Daniël Heres <[email protected]>
AuthorDate: Tue Mar 24 14:03:23 2026 +0100

    Reduce per-byte overhead in VLQ integer decoding (#9584)
    
    ## Which issue does this PR close?
    
    Closes #9580
    
    ## Rationale
    
    The current VLQ decoder calls `get_aligned` for each byte, which
    involves repeated offset calculations and bounds checks in the hot loop.
    
    ## What changes are included in this PR?
    
    Align to the byte boundary once, then iterate directly over the buffer
    slice, avoiding per-byte overhead from `get_aligned`.
    
    ## Are there any user-facing changes?
    
    No.
    
    🤖 Generated with [Claude Code](https://claude.com/claude-code)
    
    Co-authored-by: Claude Opus 4.6 (1M context) <[email protected]>
---
 parquet/src/util/bit_util.rs | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/parquet/src/util/bit_util.rs b/parquet/src/util/bit_util.rs
index 3a26603fab..2625648258 100644
--- a/parquet/src/util/bit_util.rs
+++ b/parquet/src/util/bit_util.rs
@@ -659,9 +659,15 @@ impl BitReader {
     ///
     /// Returns `None` if there's not enough bytes in the stream. `Some` 
otherwise.
     pub fn get_vlq_int(&mut self) -> Option<i64> {
+        // Align to byte boundary once, then read bytes directly
+        self.byte_offset = self.get_byte_offset();
+        self.bit_offset = 0;
+
+        let buf = &self.buffer[self.byte_offset..];
         let mut shift = 0;
         let mut v: i64 = 0;
-        while let Some(byte) = self.get_aligned::<u8>(1) {
+
+        for (i, &byte) in buf.iter().enumerate() {
             v |= ((byte & 0x7F) as i64) << shift;
             shift += 7;
             assert!(
@@ -669,6 +675,7 @@ impl BitReader {
                 "Num of bytes exceed MAX_VLQ_BYTE_LEN ({MAX_VLQ_BYTE_LEN})"
             );
             if byte & 0x80 == 0 {
+                self.byte_offset += i + 1;
                 return Some(v);
             }
         }

Reply via email to