This is an automated email from the ASF dual-hosted git repository. pkarwasz pushed a commit to branch fix/7z-header-loading in repository https://gitbox.apache.org/repos/asf/commons-compress.git
commit ae36f63aeeb1e98b00aef391114c2f5d8add6b6d Author: Piotr P. Karwasz <[email protected]> AuthorDate: Sat Oct 18 21:58:00 2025 +0200 7z: optimize header loading This change improves the efficiency of 7z header parsing: * Reads the **Signature Header** in a single ByteBuffer instead of multiple small reads, reducing overhead. * Uses a `MappedByteBuffer` to load the **Next Header** when the archive is backed by a `FileChannel`, improving performance for large headers by avoiding unnecessary copies. No new tests are added, as the existing test suite already exercises the affected header loading paths sufficiently. --- .../compress/archivers/sevenz/SevenZFile.java | 25 +++++++++++++--------- .../compress/archivers/sevenz/StartHeader.java | 7 +++++- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java index 803983793..f26960952 100644 --- a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java @@ -1247,10 +1247,8 @@ private boolean hasCurrentEntryBeenRead() { private Archive initializeArchive(final StartHeader startHeader, final byte[] password, final boolean verifyCrc) throws IOException { Archive archive = new Archive(); ByteBuffer header = mapNextHeader(startHeader); - if (verifyCrc) { - if (startHeader.nextHeaderCrc != computeChecksum(header)) { - throw new ArchiveException("Corrupted 7z archive: CRC error in next header"); - } + if (verifyCrc && startHeader.nextHeaderCrc != computeChecksum(header)) { + throw new ArchiveException("Corrupted 7z archive: CRC error in next header"); } int nid = getUnsignedByte(header); if (nid == NID.kEncodedHeader) { @@ -1291,15 +1289,22 @@ private long[] longArray(final int size) throws MemoryLimitException { return new long[size]; } + /** + * Maps the next header into memory. + * + * @param startHeader the start header + * @return the mapped ByteBuffer + * @throws IOException if an I/O error occurs + */ private ByteBuffer mapNextHeader(final StartHeader startHeader) throws IOException { MemoryLimitException.checkKiB(bytesToKiB(startHeader.nextHeaderSize), Math.min(bytesToKiB(org.apache.commons.io.IOUtils.SOFT_MAX_ARRAY_LENGTH), maxMemoryLimitKiB)); + // startHeader is already within the channel's bounds if (channel instanceof FileChannel) { final FileChannel fileChannel = (FileChannel) channel; - return fileChannel.map(FileChannel.MapMode.READ_ONLY, startHeader.getNextHeaderPosition(), startHeader.nextHeaderSize) - .order(ByteOrder.LITTLE_ENDIAN); + return fileChannel.map(FileChannel.MapMode.READ_ONLY, startHeader.position(), startHeader.nextHeaderSize).order(ByteOrder.LITTLE_ENDIAN); } - channel.position(startHeader.getNextHeaderPosition()); + channel.position(startHeader.position()); final ByteBuffer buf = ByteBuffer.allocate(startHeader.nextHeaderSize).order(ByteOrder.LITTLE_ENDIAN); readFully(buf); return buf; @@ -1429,7 +1434,7 @@ private ByteBuffer readEncodedHeader(final ByteBuffer header, final Archive arch .get(); // @formatter:on } - final int unpackSize = toNonNegativeInt("unpackSize", folder.getUnpackSize()); + final int unpackSize = toNonNegativeInt("header", folder.getUnpackSize()); final byte[] nextHeader = IOUtils.readRange(inputStreamStack, unpackSize); if (nextHeader.length < unpackSize) { throw new ArchiveException("Premature end of stream"); @@ -1709,7 +1714,7 @@ private void readHeader(final ByteBuffer header, final Archive archive) throws I private Archive readHeaders(final byte[] password) throws IOException { final ByteBuffer startHeader = ByteBuffer.allocate(SIGNATURE_HEADER_SIZE).order(ByteOrder.LITTLE_ENDIAN); readFully(startHeader); - final byte[] signature = new byte[6]; + final byte[] signature = new byte[SIGNATURE.length]; startHeader.get(signature); if (!Arrays.equals(signature, SIGNATURE)) { throw new ArchiveException("Bad 7z signature"); @@ -1764,7 +1769,7 @@ private StartHeader readStartHeader(final ByteBuffer startHeader) throws IOExcep if (nextHeaderOffset > channel.size() - SIGNATURE_HEADER_SIZE) { throw new ArchiveException("nextHeaderOffset is out of bounds"); } - final int nextHeaderSize = toNonNegativeInt("startHeader.nextHeaderSize", readRealUint64(startHeader)); + final int nextHeaderSize = toNonNegativeInt("header", readRealUint64(startHeader)); if (nextHeaderSize > channel.size() - SIGNATURE_HEADER_SIZE - nextHeaderOffset) { throw new ArchiveException("nextHeaderSize is out of bounds"); } diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java index 733db5a71..c1049e85e 100644 --- a/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java @@ -34,7 +34,12 @@ final class StartHeader { this.nextHeaderCrc = nextHeaderCrc; } - long getNextHeaderPosition() { + /** + * Gets the position of the next header in the file. + * + * @return the position of the next header + */ + long position() { return SevenZFile.SIGNATURE_HEADER_SIZE + nextHeaderOffset; } }
