This is an automated email from the ASF dual-hosted git repository. pkarwasz pushed a commit to branch fix/7z-header-loading in repository https://gitbox.apache.org/repos/asf/commons-compress.git
commit 8edc1fc7787f79d56e2c2048fcfc712de97d8332 Author: Piotr P. Karwasz <[email protected]> AuthorDate: Wed Oct 15 16:07:00 2025 +0200 7z: improve header loading --- .../compress/archivers/sevenz/SevenZFile.java | 151 ++++++++------------- .../compress/archivers/sevenz/StartHeader.java | 4 + .../compress/archivers/sevenz/SevenZFileTest.java | 18 +-- 3 files changed, 67 insertions(+), 106 deletions(-) diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java index cdd6e1977..803983793 100644 --- a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java @@ -20,7 +20,6 @@ import java.io.BufferedInputStream; import java.io.ByteArrayInputStream; -import java.io.DataInputStream; import java.io.EOFException; import java.io.File; import java.io.FilterInputStream; @@ -28,7 +27,7 @@ import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.nio.channels.Channels; +import java.nio.channels.FileChannel; import java.nio.channels.SeekableByteChannel; import java.util.ArrayList; import java.util.Arrays; @@ -39,7 +38,6 @@ import java.util.Map; import java.util.Objects; import java.util.zip.CRC32; -import java.util.zip.CheckedInputStream; import org.apache.commons.compress.MemoryLimitException; import org.apache.commons.compress.archivers.AbstractArchiveBuilder; @@ -437,6 +435,14 @@ private static ByteBuffer ensureRemaining(final ByteBuffer header, final long ex return header; } + private static long computeChecksum(final ByteBuffer header) { + final int currentPosition = header.position(); + final CRC32 crc = new CRC32(); + crc.update(header); + header.position(currentPosition); + return crc.getValue(); + } + /** * Wrapper of {@link ByteBuffer#get(byte[])} that checks remaining bytes first. */ @@ -492,14 +498,14 @@ static int readFieldSize(final ByteBuffer header) throws ArchiveException { } /** - * Reads a 7z REAL_UINT64 from the stream. + * Reads a 7z REAL_UINT64 from the header. * - * @param inputStream the input stream containing the 7z header. + * @param header the buffer containing the 7z header. * @return a non-negative long. * @throws ArchiveException if the value is truncated or too large. */ - static long readRealUint64(final DataInputStream inputStream) throws IOException { - final long value = Long.reverseBytes(inputStream.readLong()); + static long readRealUint64(final ByteBuffer header) throws IOException { + final long value = header.getLong(); if (value < 0) { throw new ArchiveException("7z archive: Unsupported, cannot handle integer larger then %d, but was %s", Integer.MAX_VALUE, Long.toUnsignedString(value)); @@ -518,18 +524,6 @@ static long readUint32(final ByteBuffer header) throws ArchiveException { return Integer.toUnsignedLong(getInt(header)); } - - /** - * Reads a 7z UINT32 from the stream. - * - * @param inputStream the input stream containing the 7z header. - * @return a non-negative long. - * @throws ArchiveException if the value is truncated. - */ - static long readUint32(final DataInputStream inputStream) throws IOException { - return Integer.toUnsignedLong(Integer.reverseBytes(inputStream.readInt())); - } - /** * Reads a 7z UINT64 from the header. * @@ -1251,34 +1245,24 @@ private boolean hasCurrentEntryBeenRead() { } private Archive initializeArchive(final StartHeader startHeader, final byte[] password, final boolean verifyCrc) throws IOException { - MemoryLimitException.checkKiB(bytesToKiB(startHeader.nextHeaderSize), Math.min(bytesToKiB(org.apache.commons.io.IOUtils.SOFT_MAX_ARRAY_LENGTH), - maxMemoryLimitKiB)); - channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset); + Archive archive = new Archive(); + ByteBuffer header = mapNextHeader(startHeader); if (verifyCrc) { - final long position = channel.position(); - final CheckedInputStream cis = new CheckedInputStream(Channels.newInputStream(channel), new CRC32()); - if (cis.skip(startHeader.nextHeaderSize) != startHeader.nextHeaderSize) { - throw new ArchiveException("Problem computing NextHeader CRC-32"); + if (startHeader.nextHeaderCrc != computeChecksum(header)) { + throw new ArchiveException("Corrupted 7z archive: CRC error in next header"); } - if (startHeader.nextHeaderCrc != cis.getChecksum().getValue()) { - throw new ArchiveException("NextHeader CRC-32 mismatch"); -} - channel.position(position); } - Archive archive = new Archive(); - ByteBuffer buf = ByteBuffer.allocate(startHeader.nextHeaderSize).order(ByteOrder.LITTLE_ENDIAN); - readFully(buf); - int nid = getUnsignedByte(buf); + int nid = getUnsignedByte(header); if (nid == NID.kEncodedHeader) { - buf = readEncodedHeader(buf, archive, password); + header = readEncodedHeader(header, archive, password); // Archive gets rebuilt with the new header archive = new Archive(); - nid = getUnsignedByte(buf); + nid = getUnsignedByte(header); } if (nid != NID.kHeader) { throw new ArchiveException("7z archive: Broken or unsupported, no Header"); } - readHeader(buf, archive); + readHeader(header, archive); archive.subStreamsInfo = null; return archive; } @@ -1307,6 +1291,20 @@ private long[] longArray(final int size) throws MemoryLimitException { return new long[size]; } + private ByteBuffer mapNextHeader(final StartHeader startHeader) throws IOException { + MemoryLimitException.checkKiB(bytesToKiB(startHeader.nextHeaderSize), Math.min(bytesToKiB(org.apache.commons.io.IOUtils.SOFT_MAX_ARRAY_LENGTH), + maxMemoryLimitKiB)); + if (channel instanceof FileChannel) { + final FileChannel fileChannel = (FileChannel) channel; + return fileChannel.map(FileChannel.MapMode.READ_ONLY, startHeader.getNextHeaderPosition(), startHeader.nextHeaderSize) + .order(ByteOrder.LITTLE_ENDIAN); + } + channel.position(startHeader.getNextHeaderPosition()); + final ByteBuffer buf = ByteBuffer.allocate(startHeader.nextHeaderSize).order(ByteOrder.LITTLE_ENDIAN); + readFully(buf); + return buf; + } + /** * Reads a byte of data. * @@ -1411,7 +1409,7 @@ private ByteBuffer readEncodedHeader(final ByteBuffer header, final Archive arch // FIXME: merge with buildDecodingStream()/buildDecoderStack() at some stage? final Folder folder = archive.folders[0]; final int firstPackStreamIndex = 0; - final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 0; + final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos; channel.position(folderOffset); InputStream inputStreamStack = new BoundedSeekableByteChannelInputStream(channel, archive.packSizes[firstPackStreamIndex]); for (final Coder coder : folder.getOrderedCoders()) { @@ -1680,7 +1678,7 @@ Folder readFolder(final ByteBuffer header) throws IOException { private void readFully(final ByteBuffer buf) throws IOException { buf.rewind(); - IOUtils.readFully(channel, buf); + org.apache.commons.io.IOUtils.read(channel, buf); buf.flip(); } @@ -1709,40 +1707,24 @@ private void readHeader(final ByteBuffer header, final Archive archive) throws I } private Archive readHeaders(final byte[] password) throws IOException { - final ByteBuffer buf = ByteBuffer.allocate(12 /* signature + 2 bytes version + 4 bytes CRC */).order(ByteOrder.LITTLE_ENDIAN); - readFully(buf); + final ByteBuffer startHeader = ByteBuffer.allocate(SIGNATURE_HEADER_SIZE).order(ByteOrder.LITTLE_ENDIAN); + readFully(startHeader); final byte[] signature = new byte[6]; - buf.get(signature); + startHeader.get(signature); if (!Arrays.equals(signature, SIGNATURE)) { throw new ArchiveException("Bad 7z signature"); } // 7zFormat.txt has it wrong - it's first major then minor - final byte archiveVersionMajor = buf.get(); - final byte archiveVersionMinor = buf.get(); + final byte archiveVersionMajor = startHeader.get(); + final byte archiveVersionMinor = startHeader.get(); if (archiveVersionMajor != 0) { throw new ArchiveException("7z archive: Unsupported 7z version (%d,%d)", archiveVersionMajor, archiveVersionMinor); } - boolean headerLooksValid = false; // See https://www.7-zip.org/recover.html - "There is no correct End Header at the end of archive" - final long startHeaderCrc = readUint32(buf); - if (startHeaderCrc == 0) { - // This is an indication of a corrupt header - peek the next 20 bytes - final long currentPosition = channel.position(); - final ByteBuffer peekBuf = ByteBuffer.allocate(20); - readFully(peekBuf); - channel.position(currentPosition); - // Header invalid if all data is 0 - while (peekBuf.hasRemaining()) { - if (peekBuf.get() != 0) { - headerLooksValid = true; - break; - } - } - } else { - headerLooksValid = true; - } - if (headerLooksValid) { - return initializeArchive(readStartHeader(startHeaderCrc), password, true); + final long startHeaderCrc = readUint32(startHeader); + if (startHeaderCrc == computeChecksum(startHeader)) { + return initializeArchive(readStartHeader(startHeader), password, true); } + // See https://www.7-zip.org/recover.html - "There is no correct End Header at the end of archive" // No valid header found - probably first file of multipart archive was removed too early. Scan for end header. if (tryToRecoverBrokenArchives) { return tryToLocateEndHeader(password); @@ -1777,27 +1759,17 @@ private void readPackInfo(final ByteBuffer header, final Archive archive) throws } } - private StartHeader readStartHeader(final long startHeaderCrc) throws IOException { - // using Stream rather than ByteBuffer for the benefit of the built-in CRC check - try (DataInputStream dataInputStream = new DataInputStream(ChecksumInputStream.builder() - // @formatter:off - .setChecksum(new CRC32()) - .setInputStream(new BoundedSeekableByteChannelInputStream(channel, 20)) - .setCountThreshold(20L) - .setExpectedChecksumValue(startHeaderCrc) - .get())) { - // @formatter:on - final long nextHeaderOffset = readRealUint64(dataInputStream); - if (nextHeaderOffset > channel.size() - SIGNATURE_HEADER_SIZE) { - throw new ArchiveException("nextHeaderOffset is out of bounds"); - } - final int nextHeaderSize = toNonNegativeInt("nextHeaderSize", readRealUint64(dataInputStream)); - if (nextHeaderSize > channel.size() - SIGNATURE_HEADER_SIZE - nextHeaderOffset) { - throw new ArchiveException("nextHeaderSize is out of bounds"); - } - final long nextHeaderCrc = readUint32(dataInputStream); - return new StartHeader(nextHeaderOffset, nextHeaderSize, nextHeaderCrc); + private StartHeader readStartHeader(final ByteBuffer startHeader) throws IOException { + final long nextHeaderOffset = readRealUint64(startHeader); + if (nextHeaderOffset > channel.size() - SIGNATURE_HEADER_SIZE) { + throw new ArchiveException("nextHeaderOffset is out of bounds"); + } + final int nextHeaderSize = toNonNegativeInt("startHeader.nextHeaderSize", readRealUint64(startHeader)); + if (nextHeaderSize > channel.size() - SIGNATURE_HEADER_SIZE - nextHeaderOffset) { + throw new ArchiveException("nextHeaderSize is out of bounds"); } + final long nextHeaderCrc = readUint32(startHeader); + return new StartHeader(nextHeaderOffset, nextHeaderSize, nextHeaderCrc); } private void readStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { @@ -2383,15 +2355,8 @@ public String toString() { private Archive tryToLocateEndHeader(final byte[] password) throws IOException { final ByteBuffer nidBuf = ByteBuffer.allocate(1); final long searchLimit = 1024L * 1024 * 1; - // Main header, plus bytes that readStartHeader would read - final long previousDataSize = channel.position() + 20; - final long minPos; // Determine minimal position - can't start before current position - if (channel.position() + searchLimit > channel.size()) { - minPos = channel.position(); - } else { - minPos = channel.size() - searchLimit; - } + final long minPos = Math.max(channel.position(), channel.size() - searchLimit); long pos = channel.size() - 1; // Loop: Try from end of archive while (pos > minPos) { @@ -2406,7 +2371,7 @@ private Archive tryToLocateEndHeader(final byte[] password) throws IOException { if (nid == NID.kEncodedHeader || nid == NID.kHeader) { try { // Try to initialize Archive structure from here - final long nextHeaderOffset = pos - previousDataSize; + final long nextHeaderOffset = pos - SIGNATURE_HEADER_SIZE; // Smaller than 1 MiB, so fits in an int final long nextHeaderSize = channel.size() - pos; final StartHeader startHeader = new StartHeader(nextHeaderOffset, (int) nextHeaderSize, 0); diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java index bf7212fe1..733db5a71 100644 --- a/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java @@ -33,4 +33,8 @@ final class StartHeader { this.nextHeaderSize = nextHeaderSize; this.nextHeaderCrc = nextHeaderCrc; } + + long getNextHeaderPosition() { + return SevenZFile.SIGNATURE_HEADER_SIZE + nextHeaderOffset; + } } diff --git a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java index fea72af11..15349a29c 100644 --- a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java @@ -29,9 +29,7 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; -import java.io.DataInputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -1091,18 +1089,16 @@ void testReadingBackLZMA2DictSize() throws Exception { @ParameterizedTest @MethodSource void testReadRealUint64_Invalid(final byte[] input) throws IOException { - try (DataInputStream dis = new DataInputStream(new ByteArrayInputStream(input))) { - assertThrows(IOException.class, () -> SevenZFile.readRealUint64(dis)); - } + final ByteBuffer buf = ByteBuffer.wrap(input).order(ByteOrder.LITTLE_ENDIAN); + assertThrows(IOException.class, () -> SevenZFile.readRealUint64(buf)); } @ParameterizedTest @MethodSource void testReadRealUint64_Valid(final byte[] input, final long expected) throws IOException { - try (DataInputStream dis = new DataInputStream(new ByteArrayInputStream(input))) { - final long actual = SevenZFile.readRealUint64(dis); - assertEquals(expected, actual); - } + final ByteBuffer buf = ByteBuffer.wrap(input).order(ByteOrder.LITTLE_ENDIAN); + final long actual = SevenZFile.readRealUint64(buf); + assertEquals(expected, actual); } @Test @@ -1134,10 +1130,6 @@ void testReadTimesFromFile() throws IOException { @ParameterizedTest @MethodSource void testReadUint32_Valid(final byte[] input, final long expected) throws IOException { - try (DataInputStream dis = new DataInputStream(new ByteArrayInputStream(input))) { - final long actual = SevenZFile.readUint32(dis); - assertEquals(expected, actual); - } final ByteBuffer buf = ByteBuffer.wrap(input).order(ByteOrder.LITTLE_ENDIAN); final long actual = SevenZFile.readUint32(buf); assertEquals(expected, actual);
