This is an automated email from the ASF dual-hosted git repository.

pkarwasz pushed a commit to branch fix/7z-header-loading
in repository https://gitbox.apache.org/repos/asf/commons-compress.git

commit 8edc1fc7787f79d56e2c2048fcfc712de97d8332
Author: Piotr P. Karwasz <[email protected]>
AuthorDate: Wed Oct 15 16:07:00 2025 +0200

    7z: improve header loading
---
 .../compress/archivers/sevenz/SevenZFile.java      | 151 ++++++++-------------
 .../compress/archivers/sevenz/StartHeader.java     |   4 +
 .../compress/archivers/sevenz/SevenZFileTest.java  |  18 +--
 3 files changed, 67 insertions(+), 106 deletions(-)

diff --git 
a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java 
b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java
index cdd6e1977..803983793 100644
--- a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java
+++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java
@@ -20,7 +20,6 @@
 
 import java.io.BufferedInputStream;
 import java.io.ByteArrayInputStream;
-import java.io.DataInputStream;
 import java.io.EOFException;
 import java.io.File;
 import java.io.FilterInputStream;
@@ -28,7 +27,7 @@
 import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
-import java.nio.channels.Channels;
+import java.nio.channels.FileChannel;
 import java.nio.channels.SeekableByteChannel;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -39,7 +38,6 @@
 import java.util.Map;
 import java.util.Objects;
 import java.util.zip.CRC32;
-import java.util.zip.CheckedInputStream;
 
 import org.apache.commons.compress.MemoryLimitException;
 import org.apache.commons.compress.archivers.AbstractArchiveBuilder;
@@ -437,6 +435,14 @@ private static ByteBuffer ensureRemaining(final ByteBuffer 
header, final long ex
         return header;
     }
 
+    private static long computeChecksum(final ByteBuffer header) {
+        final int currentPosition = header.position();
+        final CRC32 crc = new CRC32();
+        crc.update(header);
+        header.position(currentPosition);
+        return crc.getValue();
+    }
+
     /**
      * Wrapper of {@link ByteBuffer#get(byte[])} that checks remaining bytes 
first.
      */
@@ -492,14 +498,14 @@ static int readFieldSize(final ByteBuffer header) throws 
ArchiveException {
     }
 
     /**
-     * Reads a 7z REAL_UINT64 from the stream.
+     * Reads a 7z REAL_UINT64 from the header.
      *
-     * @param inputStream the input stream containing the 7z header.
+     * @param header the buffer containing the 7z header.
      * @return a non-negative long.
      * @throws ArchiveException if the value is truncated or too large.
      */
-    static long readRealUint64(final DataInputStream inputStream) throws 
IOException {
-        final long value = Long.reverseBytes(inputStream.readLong());
+    static long readRealUint64(final ByteBuffer header) throws IOException {
+        final long value = header.getLong();
         if (value < 0) {
             throw new ArchiveException("7z archive: Unsupported, cannot handle 
integer larger then %d, but was %s", Integer.MAX_VALUE,
                     Long.toUnsignedString(value));
@@ -518,18 +524,6 @@ static long readUint32(final ByteBuffer header) throws 
ArchiveException {
         return Integer.toUnsignedLong(getInt(header));
     }
 
-
-    /**
-     * Reads a 7z UINT32 from the stream.
-     *
-     * @param inputStream the input stream containing the 7z header.
-     * @return a non-negative long.
-     * @throws ArchiveException if the value is truncated.
-     */
-    static long readUint32(final DataInputStream inputStream) throws 
IOException {
-        return 
Integer.toUnsignedLong(Integer.reverseBytes(inputStream.readInt()));
-    }
-
     /**
      * Reads a 7z UINT64 from the header.
      *
@@ -1251,34 +1245,24 @@ private boolean hasCurrentEntryBeenRead() {
     }
 
     private Archive initializeArchive(final StartHeader startHeader, final 
byte[] password, final boolean verifyCrc) throws IOException {
-        MemoryLimitException.checkKiB(bytesToKiB(startHeader.nextHeaderSize), 
Math.min(bytesToKiB(org.apache.commons.io.IOUtils.SOFT_MAX_ARRAY_LENGTH),
-                maxMemoryLimitKiB));
-        channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset);
+        Archive archive = new Archive();
+        ByteBuffer header = mapNextHeader(startHeader);
         if (verifyCrc) {
-            final long position = channel.position();
-            final CheckedInputStream cis = new 
CheckedInputStream(Channels.newInputStream(channel), new CRC32());
-            if (cis.skip(startHeader.nextHeaderSize) != 
startHeader.nextHeaderSize) {
-                throw new ArchiveException("Problem computing NextHeader 
CRC-32");
+            if (startHeader.nextHeaderCrc != computeChecksum(header)) {
+                throw new ArchiveException("Corrupted 7z archive: CRC error in 
next header");
             }
-            if (startHeader.nextHeaderCrc != cis.getChecksum().getValue()) {
-                throw new ArchiveException("NextHeader CRC-32 mismatch");
-}
-            channel.position(position);
         }
-        Archive archive = new Archive();
-        ByteBuffer buf = 
ByteBuffer.allocate(startHeader.nextHeaderSize).order(ByteOrder.LITTLE_ENDIAN);
-        readFully(buf);
-        int nid = getUnsignedByte(buf);
+        int nid = getUnsignedByte(header);
         if (nid == NID.kEncodedHeader) {
-            buf = readEncodedHeader(buf, archive, password);
+            header = readEncodedHeader(header, archive, password);
             // Archive gets rebuilt with the new header
             archive = new Archive();
-            nid = getUnsignedByte(buf);
+            nid = getUnsignedByte(header);
         }
         if (nid != NID.kHeader) {
             throw new ArchiveException("7z archive: Broken or unsupported, no 
Header");
         }
-        readHeader(buf, archive);
+        readHeader(header, archive);
         archive.subStreamsInfo = null;
         return archive;
     }
@@ -1307,6 +1291,20 @@ private long[] longArray(final int size) throws 
MemoryLimitException {
         return new long[size];
     }
 
+    private ByteBuffer mapNextHeader(final StartHeader startHeader) throws 
IOException {
+        MemoryLimitException.checkKiB(bytesToKiB(startHeader.nextHeaderSize), 
Math.min(bytesToKiB(org.apache.commons.io.IOUtils.SOFT_MAX_ARRAY_LENGTH),
+                maxMemoryLimitKiB));
+        if (channel instanceof FileChannel) {
+            final FileChannel fileChannel = (FileChannel) channel;
+            return fileChannel.map(FileChannel.MapMode.READ_ONLY, 
startHeader.getNextHeaderPosition(), startHeader.nextHeaderSize)
+                    .order(ByteOrder.LITTLE_ENDIAN);
+        }
+        channel.position(startHeader.getNextHeaderPosition());
+        final ByteBuffer buf = 
ByteBuffer.allocate(startHeader.nextHeaderSize).order(ByteOrder.LITTLE_ENDIAN);
+        readFully(buf);
+        return buf;
+    }
+
     /**
      * Reads a byte of data.
      *
@@ -1411,7 +1409,7 @@ private ByteBuffer readEncodedHeader(final ByteBuffer 
header, final Archive arch
         // FIXME: merge with buildDecodingStream()/buildDecoderStack() at some 
stage?
         final Folder folder = archive.folders[0];
         final int firstPackStreamIndex = 0;
-        final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 0;
+        final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos;
         channel.position(folderOffset);
         InputStream inputStreamStack = new 
BoundedSeekableByteChannelInputStream(channel, 
archive.packSizes[firstPackStreamIndex]);
         for (final Coder coder : folder.getOrderedCoders()) {
@@ -1680,7 +1678,7 @@ Folder readFolder(final ByteBuffer header) throws 
IOException {
 
     private void readFully(final ByteBuffer buf) throws IOException {
         buf.rewind();
-        IOUtils.readFully(channel, buf);
+        org.apache.commons.io.IOUtils.read(channel, buf);
         buf.flip();
     }
 
@@ -1709,40 +1707,24 @@ private void readHeader(final ByteBuffer header, final 
Archive archive) throws I
     }
 
     private Archive readHeaders(final byte[] password) throws IOException {
-        final ByteBuffer buf = ByteBuffer.allocate(12 /* signature + 2 bytes 
version + 4 bytes CRC */).order(ByteOrder.LITTLE_ENDIAN);
-        readFully(buf);
+        final ByteBuffer startHeader = 
ByteBuffer.allocate(SIGNATURE_HEADER_SIZE).order(ByteOrder.LITTLE_ENDIAN);
+        readFully(startHeader);
         final byte[] signature = new byte[6];
-        buf.get(signature);
+        startHeader.get(signature);
         if (!Arrays.equals(signature, SIGNATURE)) {
             throw new ArchiveException("Bad 7z signature");
         }
         // 7zFormat.txt has it wrong - it's first major then minor
-        final byte archiveVersionMajor = buf.get();
-        final byte archiveVersionMinor = buf.get();
+        final byte archiveVersionMajor = startHeader.get();
+        final byte archiveVersionMinor = startHeader.get();
         if (archiveVersionMajor != 0) {
             throw new ArchiveException("7z archive: Unsupported 7z version 
(%d,%d)", archiveVersionMajor, archiveVersionMinor);
         }
-        boolean headerLooksValid = false; // See 
https://www.7-zip.org/recover.html - "There is no correct End Header at the end 
of archive"
-        final long startHeaderCrc = readUint32(buf);
-        if (startHeaderCrc == 0) {
-            // This is an indication of a corrupt header - peek the next 20 
bytes
-            final long currentPosition = channel.position();
-            final ByteBuffer peekBuf = ByteBuffer.allocate(20);
-            readFully(peekBuf);
-            channel.position(currentPosition);
-            // Header invalid if all data is 0
-            while (peekBuf.hasRemaining()) {
-                if (peekBuf.get() != 0) {
-                    headerLooksValid = true;
-                    break;
-                }
-            }
-        } else {
-            headerLooksValid = true;
-        }
-        if (headerLooksValid) {
-            return initializeArchive(readStartHeader(startHeaderCrc), 
password, true);
+        final long startHeaderCrc = readUint32(startHeader);
+        if (startHeaderCrc == computeChecksum(startHeader)) {
+            return initializeArchive(readStartHeader(startHeader), password, 
true);
         }
+        // See https://www.7-zip.org/recover.html - "There is no correct End 
Header at the end of archive"
         // No valid header found - probably first file of multipart archive 
was removed too early. Scan for end header.
         if (tryToRecoverBrokenArchives) {
             return tryToLocateEndHeader(password);
@@ -1777,27 +1759,17 @@ private void readPackInfo(final ByteBuffer header, 
final Archive archive) throws
         }
     }
 
-    private StartHeader readStartHeader(final long startHeaderCrc) throws 
IOException {
-        // using Stream rather than ByteBuffer for the benefit of the built-in 
CRC check
-        try (DataInputStream dataInputStream = new 
DataInputStream(ChecksumInputStream.builder()
-                // @formatter:off
-                .setChecksum(new CRC32())
-                .setInputStream(new 
BoundedSeekableByteChannelInputStream(channel, 20))
-                .setCountThreshold(20L)
-                .setExpectedChecksumValue(startHeaderCrc)
-                .get())) {
-                // @formatter:on
-            final long nextHeaderOffset = readRealUint64(dataInputStream);
-            if (nextHeaderOffset > channel.size() - SIGNATURE_HEADER_SIZE) {
-                throw new ArchiveException("nextHeaderOffset is out of 
bounds");
-            }
-            final int nextHeaderSize = toNonNegativeInt("nextHeaderSize", 
readRealUint64(dataInputStream));
-            if (nextHeaderSize > channel.size() - SIGNATURE_HEADER_SIZE - 
nextHeaderOffset) {
-                throw new ArchiveException("nextHeaderSize is out of bounds");
-            }
-            final long nextHeaderCrc = readUint32(dataInputStream);
-            return new StartHeader(nextHeaderOffset, nextHeaderSize, 
nextHeaderCrc);
+    private StartHeader readStartHeader(final ByteBuffer startHeader) throws 
IOException {
+        final long nextHeaderOffset = readRealUint64(startHeader);
+        if (nextHeaderOffset > channel.size() - SIGNATURE_HEADER_SIZE) {
+            throw new ArchiveException("nextHeaderOffset is out of bounds");
+        }
+        final int nextHeaderSize = 
toNonNegativeInt("startHeader.nextHeaderSize", readRealUint64(startHeader));
+        if (nextHeaderSize > channel.size() - SIGNATURE_HEADER_SIZE - 
nextHeaderOffset) {
+            throw new ArchiveException("nextHeaderSize is out of bounds");
         }
+        final long nextHeaderCrc = readUint32(startHeader);
+        return new StartHeader(nextHeaderOffset, nextHeaderSize, 
nextHeaderCrc);
     }
 
     private void readStreamsInfo(final ByteBuffer header, final Archive 
archive) throws IOException {
@@ -2383,15 +2355,8 @@ public String toString() {
     private Archive tryToLocateEndHeader(final byte[] password) throws 
IOException {
         final ByteBuffer nidBuf = ByteBuffer.allocate(1);
         final long searchLimit = 1024L * 1024 * 1;
-        // Main header, plus bytes that readStartHeader would read
-        final long previousDataSize = channel.position() + 20;
-        final long minPos;
         // Determine minimal position - can't start before current position
-        if (channel.position() + searchLimit > channel.size()) {
-            minPos = channel.position();
-        } else {
-            minPos = channel.size() - searchLimit;
-        }
+        final long minPos = Math.max(channel.position(), channel.size() - 
searchLimit);
         long pos = channel.size() - 1;
         // Loop: Try from end of archive
         while (pos > minPos) {
@@ -2406,7 +2371,7 @@ private Archive tryToLocateEndHeader(final byte[] 
password) throws IOException {
             if (nid == NID.kEncodedHeader || nid == NID.kHeader) {
                 try {
                     // Try to initialize Archive structure from here
-                    final long nextHeaderOffset = pos - previousDataSize;
+                    final long nextHeaderOffset = pos - SIGNATURE_HEADER_SIZE;
                     // Smaller than 1 MiB, so fits in an int
                     final long nextHeaderSize = channel.size() - pos;
                     final StartHeader startHeader = new 
StartHeader(nextHeaderOffset, (int) nextHeaderSize, 0);
diff --git 
a/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java 
b/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java
index bf7212fe1..733db5a71 100644
--- 
a/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java
+++ 
b/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java
@@ -33,4 +33,8 @@ final class StartHeader {
         this.nextHeaderSize = nextHeaderSize;
         this.nextHeaderCrc = nextHeaderCrc;
     }
+
+    long getNextHeaderPosition() {
+        return SevenZFile.SIGNATURE_HEADER_SIZE + nextHeaderOffset;
+    }
 }
diff --git 
a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java
 
b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java
index fea72af11..15349a29c 100644
--- 
a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java
+++ 
b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java
@@ -29,9 +29,7 @@
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
@@ -1091,18 +1089,16 @@ void testReadingBackLZMA2DictSize() throws Exception {
     @ParameterizedTest
     @MethodSource
     void testReadRealUint64_Invalid(final byte[] input) throws IOException {
-        try (DataInputStream dis = new DataInputStream(new 
ByteArrayInputStream(input))) {
-            assertThrows(IOException.class, () -> 
SevenZFile.readRealUint64(dis));
-        }
+        final ByteBuffer buf = 
ByteBuffer.wrap(input).order(ByteOrder.LITTLE_ENDIAN);
+        assertThrows(IOException.class, () -> SevenZFile.readRealUint64(buf));
     }
 
     @ParameterizedTest
     @MethodSource
     void testReadRealUint64_Valid(final byte[] input, final long expected) 
throws IOException {
-        try (DataInputStream dis = new DataInputStream(new 
ByteArrayInputStream(input))) {
-            final long actual = SevenZFile.readRealUint64(dis);
-            assertEquals(expected, actual);
-        }
+        final ByteBuffer buf = 
ByteBuffer.wrap(input).order(ByteOrder.LITTLE_ENDIAN);
+        final long actual = SevenZFile.readRealUint64(buf);
+        assertEquals(expected, actual);
     }
 
     @Test
@@ -1134,10 +1130,6 @@ void testReadTimesFromFile() throws IOException {
     @ParameterizedTest
     @MethodSource
     void testReadUint32_Valid(final byte[] input, final long expected) throws 
IOException {
-        try (DataInputStream dis = new DataInputStream(new 
ByteArrayInputStream(input))) {
-            final long actual = SevenZFile.readUint32(dis);
-            assertEquals(expected, actual);
-        }
         final ByteBuffer buf = 
ByteBuffer.wrap(input).order(ByteOrder.LITTLE_ENDIAN);
         final long actual = SevenZFile.readUint32(buf);
         assertEquals(expected, actual);

Reply via email to