This is an automated email from the ASF dual-hosted git repository.

pkarwasz pushed a commit to branch fix/7z-header-loading
in repository https://gitbox.apache.org/repos/asf/commons-compress.git

commit ae36f63aeeb1e98b00aef391114c2f5d8add6b6d
Author: Piotr P. Karwasz <[email protected]>
AuthorDate: Sat Oct 18 21:58:00 2025 +0200

    7z: optimize header loading
    
    This change improves the efficiency of 7z header parsing:
    
    * Reads the **Signature Header** in a single ByteBuffer instead of multiple 
small reads, reducing overhead.
    * Uses a `MappedByteBuffer` to load the **Next Header** when the archive is 
backed by a `FileChannel`, improving performance for large headers by avoiding 
unnecessary copies.
    
    No new tests are added, as the existing test suite already exercises the 
affected header loading paths sufficiently.
---
 .../compress/archivers/sevenz/SevenZFile.java      | 25 +++++++++++++---------
 .../compress/archivers/sevenz/StartHeader.java     |  7 +++++-
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git 
a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java 
b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java
index 803983793..f26960952 100644
--- a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java
@@ -1247,10 +1247,8 @@ private boolean hasCurrentEntryBeenRead() {
     private Archive initializeArchive(final StartHeader startHeader, final 
byte[] password, final boolean verifyCrc) throws IOException {
         Archive archive = new Archive();
         ByteBuffer header = mapNextHeader(startHeader);
-        if (verifyCrc) {
-            if (startHeader.nextHeaderCrc != computeChecksum(header)) {
-                throw new ArchiveException("Corrupted 7z archive: CRC error in 
next header");
-            }
+        if (verifyCrc && startHeader.nextHeaderCrc != computeChecksum(header)) 
{
+            throw new ArchiveException("Corrupted 7z archive: CRC error in 
next header");
         }
         int nid = getUnsignedByte(header);
         if (nid == NID.kEncodedHeader) {
@@ -1291,15 +1289,22 @@ private long[] longArray(final int size) throws 
MemoryLimitException {
         return new long[size];
     }
 
+    /**
+     * Maps the next header into memory.
+     *
+     * @param startHeader the start header
+     * @return the mapped ByteBuffer
+     * @throws IOException if an I/O error occurs
+     */
     private ByteBuffer mapNextHeader(final StartHeader startHeader) throws 
IOException {
         MemoryLimitException.checkKiB(bytesToKiB(startHeader.nextHeaderSize), 
Math.min(bytesToKiB(org.apache.commons.io.IOUtils.SOFT_MAX_ARRAY_LENGTH),
                 maxMemoryLimitKiB));
+        // startHeader is already within the channel's bounds
         if (channel instanceof FileChannel) {
             final FileChannel fileChannel = (FileChannel) channel;
-            return fileChannel.map(FileChannel.MapMode.READ_ONLY, 
startHeader.getNextHeaderPosition(), startHeader.nextHeaderSize)
-                    .order(ByteOrder.LITTLE_ENDIAN);
+            return fileChannel.map(FileChannel.MapMode.READ_ONLY, 
startHeader.position(), 
startHeader.nextHeaderSize).order(ByteOrder.LITTLE_ENDIAN);
         }
-        channel.position(startHeader.getNextHeaderPosition());
+        channel.position(startHeader.position());
         final ByteBuffer buf = 
ByteBuffer.allocate(startHeader.nextHeaderSize).order(ByteOrder.LITTLE_ENDIAN);
         readFully(buf);
         return buf;
@@ -1429,7 +1434,7 @@ private ByteBuffer readEncodedHeader(final ByteBuffer 
header, final Archive arch
                     .get();
             // @formatter:on
         }
-        final int unpackSize = toNonNegativeInt("unpackSize", 
folder.getUnpackSize());
+        final int unpackSize = toNonNegativeInt("header", 
folder.getUnpackSize());
         final byte[] nextHeader = IOUtils.readRange(inputStreamStack, 
unpackSize);
         if (nextHeader.length < unpackSize) {
             throw new ArchiveException("Premature end of stream");
@@ -1709,7 +1714,7 @@ private void readHeader(final ByteBuffer header, final 
Archive archive) throws I
     private Archive readHeaders(final byte[] password) throws IOException {
         final ByteBuffer startHeader = 
ByteBuffer.allocate(SIGNATURE_HEADER_SIZE).order(ByteOrder.LITTLE_ENDIAN);
         readFully(startHeader);
-        final byte[] signature = new byte[6];
+        final byte[] signature = new byte[SIGNATURE.length];
         startHeader.get(signature);
         if (!Arrays.equals(signature, SIGNATURE)) {
             throw new ArchiveException("Bad 7z signature");
@@ -1764,7 +1769,7 @@ private StartHeader readStartHeader(final ByteBuffer 
startHeader) throws IOExcep
         if (nextHeaderOffset > channel.size() - SIGNATURE_HEADER_SIZE) {
             throw new ArchiveException("nextHeaderOffset is out of bounds");
         }
-        final int nextHeaderSize = 
toNonNegativeInt("startHeader.nextHeaderSize", readRealUint64(startHeader));
+        final int nextHeaderSize = toNonNegativeInt("header", 
readRealUint64(startHeader));
         if (nextHeaderSize > channel.size() - SIGNATURE_HEADER_SIZE - 
nextHeaderOffset) {
             throw new ArchiveException("nextHeaderSize is out of bounds");
         }
diff --git 
a/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java 
b/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java
index 733db5a71..c1049e85e 100644
--- 
a/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java
+++ 
b/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java
@@ -34,7 +34,12 @@ final class StartHeader {
         this.nextHeaderCrc = nextHeaderCrc;
     }
 
-    long getNextHeaderPosition() {
+    /**
+     * Gets the position of the next header in the file.
+     *
+     * @return the position of the next header
+     */
+    long position() {
         return SevenZFile.SIGNATURE_HEADER_SIZE + nextHeaderOffset;
     }
 }

Reply via email to