This is an automated email from the ASF dual-hosted git repository. pkarwasz pushed a commit to branch fix/folder-sizes in repository https://gitbox.apache.org/repos/asf/commons-compress.git
commit c40caea86074144b1263dd4d96431c16ffddad0e Author: Piotr P. Karwasz <[email protected]> AuthorDate: Tue Oct 14 22:10:54 2025 +0200 7z: enforce reference limits on `Folder` parsing This change aligns `Folder` parsing with the limits defined in the official 7-Zip implementation ([`7zIn.cpp`](https://github.com/ip7z/7zip/blob/main/CPP/7zip/Archive/7z/7zIn.cpp)): * Maximum coders per folder: **64** * Maximum input streams per coder: **64** * Maximum output streams per coder: **1** * Maximum total input streams per folder: **64** These bounds are consistent with the reference behavior and are safe because: * Other 7z implementations use the same or stricter limits. * No supported coder uses multiple inputs or outputs. * Custom coder definitions are not supported in this implementation. By enforcing these limits, the parser becomes simpler and more predictable, and redundant dynamic size checks can be removed. --- src/changes/changes.xml | 1 + .../commons/compress/archivers/sevenz/Folder.java | 18 ++- .../compress/archivers/sevenz/SevenZFile.java | 78 ++++++++++--- .../compress/archivers/sevenz/SevenZFileTest.java | 129 +++++++++++++++++++++ .../archivers/sevenz/SevenZFolderTest.java | 4 +- 5 files changed, 209 insertions(+), 21 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 0efc712ce..885cf0085 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -56,6 +56,7 @@ The <action> type attribute can be add,update,fix,remove. <action type="fix" dev="ggregory" due-to="Gary Gregory">AES256SHA256Decoder now enforces the CPP source k_NumCyclesPower_Supported_MAX = 24 limit.</action> <action type="fix" dev="ggregory" due-to="Gary Gregory">Don't loose precision while reading folders from a SevenZFile.</action> <action type="fix" dev="ggregory" due-to="Roel van Dijk, Gary Gregory">Improve some exception messages in TarUtils and TarArchiveEntry.</action> + <action type="fix" dev="pkarwasz" due-to="Piotr P. Karwasz">SevenZFile now enforces the same folder and coder limits as the CPP implementation.</action> <!-- FIX bzip2 --> <action type="fix" dev="ggregory" due-to="Tyler Nighswander, Gary Gregory">BZip2CompressorInputStream now throw CompressorException (a subclass of IOException) for invalid or corrupted data, providing more specific error reporting.</action> <action type="fix" dev="pkarwasz" due-to="Tyler Nighswander, Piotr P. Karwasz">BZip2 input streams treat Huffman codes longer than 20 bits as corrupted data, matching the behavior of the reference implementation.</action> diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/Folder.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/Folder.java index fca70b073..7e3686558 100644 --- a/src/main/java/org/apache/commons/compress/archivers/sevenz/Folder.java +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/Folder.java @@ -37,13 +37,23 @@ final class Folder { /** * Total number of input streams across all coders. This field is currently unused but technically part of the 7z API. + * + * <p>Currently limited to {@code MAX_CODER_STREAMS_PER_FOLDER}</p> */ - long totalInputStreams; + int totalInputStreams; - /** Total number of output streams across all coders. */ - long totalOutputStreams; + /** + * Total number of output streams across all coders. + * + * <p>Currently limited to {@code MAX_CODER_STREAMS_PER_FOLDER}</p> + */ + int totalOutputStreams; - /** Mapping between input and output streams. */ + /** + * Mapping between input and output streams. + * + * <p>Its size is equal to {@code totalOutputStreams - 1}</p> + */ BindPair[] bindPairs; /** Indices of input streams, one per input stream not listed in bindPairs. */ diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java index 4ebf1c619..72aa29a26 100644 --- a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java @@ -348,6 +348,31 @@ public Builder setUseDefaultNameForUnnamedEntries(final boolean useDefaultNameFo */ public static int SOFT_MAX_ARRAY_LENGTH = Integer.MAX_VALUE - 8; + /** + * Maximum number of coders permitted in a single 7z folder. + * + * <p>This limit is defined by the original 7-Zip implementation + * ({@code CPP/7zip/Archive/7z/7zIn.cpp}) to guard against malformed archives:</p> + * + * <pre> + * #define k_Scan_NumCoders_MAX 64 + * </pre> + */ + private static final int MAX_CODERS_PER_FOLDER = 64; + + /** + * Maximum total number of coder input/output streams permitted in a single folder. + * + * <p>This limit is also taken from the reference implementation + * ({@code CPP/7zip/Archive/7z/7zIn.cpp}):</p> + * + * <pre> + * #define k_Scan_NumCodersStreams_in_Folder_MAX 64 + * </pre> + */ + private static final int MAX_CODER_STREAMS_PER_FOLDER = 64; + + /** * Creates a new Builder. * @@ -1437,12 +1462,15 @@ private void readFilesInfo(final ByteBuffer header, final Archive archive) throw calculateStreamMap(archive); } - private Folder readFolder(final ByteBuffer header) throws IOException { + Folder readFolder(final ByteBuffer header) throws IOException { final Folder folder = new Folder(); - final int numCoders = readUint64ToIntExact(header); - final Coder[] coders = new Coder[checkObjectArray(numCoders)]; - long totalInStreams = 0; - long totalOutStreams = 0; + final long numCoders = readUint64(header); + if (numCoders == 0 || numCoders > MAX_CODERS_PER_FOLDER) { + throw new ArchiveException("Unsupported 7z archive: " + numCoders + " coders in folder."); + } + final Coder[] coders = new Coder[(int) numCoders]; + int totalInStreams = 0; + int totalOutStreams = 0; for (int i = 0; i < coders.length; i++) { final int bits = getUnsignedByte(header); final int idSize = bits & 0xf; @@ -1458,10 +1486,19 @@ private Folder readFolder(final ByteBuffer header) throws IOException { numOutStreams = 1; } else { numInStreams = readUint64(header); + if (numInStreams > MAX_CODER_STREAMS_PER_FOLDER) { + throw new ArchiveException("Unsupported 7z archive: %,d coder input streams in folder.", numInStreams); + } numOutStreams = readUint64(header); + if (numOutStreams != 1) { + throw new ArchiveException("Unsupported 7z archive: %,d coder output streams in folder.", numOutStreams); + } + } + totalInStreams += (int) numInStreams; + if (totalInStreams > MAX_CODER_STREAMS_PER_FOLDER) { + throw new ArchiveException("Unsupported 7z archive: %,d coder input streams in folder.", totalInStreams); } - totalInStreams = ArchiveException.addExact(totalInStreams, numInStreams); - totalOutStreams = ArchiveException.addExact(totalOutStreams, numOutStreams); + totalOutStreams += (int) numOutStreams; byte[] properties = null; if (hasAttributes) { final long propertiesSize = readUint64(header); @@ -1470,22 +1507,30 @@ private Folder readFolder(final ByteBuffer header) throws IOException { } // would need to keep looping as above: if (moreAlternativeMethods) { - throw new ArchiveException("Alternative methods are unsupported, please report. The reference implementation doesn't support them either."); + throw new ArchiveException("Unsupported 7z archive: alternative methods are unsupported, please report. " + + "The reference implementation doesn't support them either."); } coders[i] = new Coder(decompressionMethodId, numInStreams, numOutStreams, properties); } folder.coders = coders; folder.totalInputStreams = totalInStreams; folder.totalOutputStreams = totalOutStreams; - final long numBindPairs = totalOutStreams - 1; - final BindPair[] bindPairs = new BindPair[checkObjectArray(ArchiveException.toIntExact(numBindPairs))]; + final int numBindPairs = totalOutStreams - 1; + final BindPair[] bindPairs = new BindPair[numBindPairs]; for (int i = 0; i < bindPairs.length; i++) { - bindPairs[i] = new BindPair(readUint64(header), readUint64(header)); + final long inIndex = readUint64(header); + if (inIndex >= totalInStreams) { + throw new ArchiveException("Unsupported 7z archive: bind pair inIndex %d out of range.", inIndex); + } + final long outIndex = readUint64(header); + if (outIndex >= totalOutStreams) { + throw new ArchiveException("Unsupported 7z archive: bind pair outIndex %d out of range.", inIndex); + } + bindPairs[i] = new BindPair(inIndex, outIndex); } folder.bindPairs = bindPairs; - final long numPackedStreams = totalInStreams - numBindPairs; - final int numPackedStreamsInt = ArchiveException.toIntExact(numPackedStreams); - final long[] packedStreams = new long[checkObjectArray(numPackedStreamsInt)]; + final int numPackedStreams = totalInStreams - numBindPairs; + final long[] packedStreams = new long[numPackedStreams]; if (numPackedStreams == 1) { long i; for (i = 0; i < totalInStreams; i++) { @@ -1495,8 +1540,11 @@ private Folder readFolder(final ByteBuffer header) throws IOException { } packedStreams[0] = i; } else { - for (int i = 0; i < numPackedStreamsInt; i++) { + for (int i = 0; i < numPackedStreams; i++) { packedStreams[i] = readUint64(header); + if (packedStreams[i] >= totalInStreams) { + throw new ArchiveException("Unsupported 7z archive: packed stream index %d out of range.", packedStreams[i]); + } } } folder.packedStreams = packedStreams; diff --git a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java index ec3878429..2d19584f4 100644 --- a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java @@ -33,6 +33,8 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.attribute.FileTime; @@ -47,7 +49,9 @@ import java.util.List; import java.util.Map; import java.util.Random; +import java.util.function.Consumer; import java.util.function.Function; +import java.util.stream.Stream; import javax.crypto.Cipher; @@ -61,6 +65,8 @@ import org.apache.commons.io.input.ChecksumInputStream; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; class SevenZFileTest extends AbstractArchiveFileTest<SevenZArchiveEntry> { private static final String TEST2_CONTENT = "<?xml version = '1.0'?>\r\n<!DOCTYPE connections>\r\n<meinxml>\r\n\t<leer />\r\n</meinxml>\n"; @@ -838,6 +844,70 @@ void testReadEntriesOfSize0() throws IOException { } } + static Stream<Consumer<ByteBuffer>> testReadFolder_Unsupported() { + return Stream.of( + // Folder with no coders + buf -> writeFolder(buf, new Coder[0]), + // Folder with too many coders + buf -> { + final Coder[] coders = new Coder[65]; + final Coder simpleCoder = new Coder(new byte[] { 0x03 }, 1, 1, null); + Arrays.fill(coders, simpleCoder); + writeFolder(buf, coders); + }, + // Folder with too many input streams per coder + buf -> { + final Coder coder = new Coder(new byte[] { 0x03 }, 65, 1, null); + writeFolder(buf, new Coder[] { coder }); + }, + // Folder with more than one output stream per coder + buf -> { + final Coder coder = new Coder(new byte[] { 0x03 }, 1, 2, null); + writeFolder(buf, new Coder[] { coder }); + }, + // Folder with too many total input streams + buf -> { + final Coder coder = new Coder(new byte[] { 0x03 }, 2, 1, null); + final Coder[] coders = new Coder[33]; + Arrays.fill(coders, coder); + writeFolder(buf, coders); + }, + // Folder with more alternative methods (not supported yet) + buf -> writeFolder(buf, new Coder[]{new Coder(new byte[]{0x03}, 1, 1, null)}, + true, false, false, false), + // Folder with unsupported bind pair in index + buf -> { + final Coder coder = new Coder(new byte[] { 0x03 }, 1, 1, null); + writeFolder(buf, new Coder[] { coder, coder }, false, true, false, false); + }, + // Folder with unsupported bind pair out index + buf -> { + final Coder coder = new Coder(new byte[] { 0x03 }, 1, 1, null); + writeFolder(buf, new Coder[] { coder, coder }, false, false, true, false); + }, + // Folder with unsupported packed stream index + buf -> { + final Coder coder = new Coder(new byte[]{0x03}, 2, 1, null); + writeFolder(buf, new Coder[]{ coder, coder }, false, false, false, true); + } + ); + } + + @ParameterizedTest + @MethodSource + void testReadFolder_Unsupported(final Consumer<ByteBuffer> folderWriter) throws IOException { + try (SevenZFile file = SevenZFile.builder().setURI(getURI("bla.7z")).get()) { + // Allocate a buffer large enough to hold the folder data + final ByteBuffer buffer = ByteBuffer.allocate(8192).order(ByteOrder.LITTLE_ENDIAN); + folderWriter.accept(buffer); + buffer.flip(); + final ArchiveException e = assertThrows(ArchiveException.class, () -> { + file.readFolder(buffer); + }); + assertTrue(e.getMessage().contains("Unsupported 7z archive")); + } + } + /** * Test case for <a href="https://issues.apache.org/jira/browse/COMPRESS-681">COMPRESS-681</a>. */ @@ -1012,4 +1082,63 @@ void testSignatureCheck() { final byte[] data3 = { '7', 'z', (byte) 0xBC, (byte) 0xAF, 0x27, 0x1D }; assertFalse(SevenZFile.matches(data3, data3.length)); } + + private static void writeBindPair(final ByteBuffer buffer, final long inIndex, final long outIndex) { + writeUint64(buffer, inIndex); + writeUint64(buffer, outIndex); + } + + private static void writeCoder(final ByteBuffer buffer, final byte[] methodId, final long numInStreams, final long numOutStreams, + final boolean moreAlternativeMethods) { + final boolean isComplex = numInStreams != 1 || numOutStreams != 1; + int flag = methodId.length; + if (isComplex) { + flag |= 0x10; + } + if (moreAlternativeMethods) { + flag |= 0x80; + } + // coder + buffer.put((byte) flag); + buffer.put(methodId); + if (isComplex) { + writeUint64(buffer, numInStreams); + writeUint64(buffer, numOutStreams); + } + } + + private static void writeFolder(final ByteBuffer buffer, final Coder[] coders, final boolean moreAlternativeMethods, final boolean unsupportedBindPairIn, + final boolean unsupportedBindPairOut, final boolean unsupportedPackedStreams) { + writeUint64(buffer, coders.length); + long totalInStreams = 0; + long totalOutStreams = 0; + for (final Coder coder : coders) { + writeCoder(buffer, coder.decompressionMethodId, coder.numInStreams, coder.numOutStreams, moreAlternativeMethods); + totalInStreams += coder.numInStreams; + totalOutStreams += coder.numOutStreams; + } + long i = 0; + // Bind pairs: one less than number of total out streams + for (; i < totalOutStreams - 1; i++) { + final long inIndex = (unsupportedBindPairIn ? totalInStreams : 0) + i; + final long outIndex = (unsupportedBindPairOut ? totalOutStreams : 0) + i + 1; + writeBindPair(buffer, inIndex, outIndex); + } + // Packed streams: one per in stream that is not bound + if (totalInStreams > i + 1) { + for (; i < totalInStreams; i++) { + final long packedStreamIndex = (unsupportedPackedStreams ? totalInStreams : 0) + i; + writeUint64(buffer, packedStreamIndex); + } + } + } + + private static void writeFolder(final ByteBuffer buffer, final Coder[] coders) { + writeFolder(buffer, coders, false, false, false, false); + } + + private static void writeUint64(final ByteBuffer buffer, final long value) { + buffer.put((byte) 0b1111_1111); + buffer.putLong(value); + } } diff --git a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFolderTest.java b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFolderTest.java index 721395505..ed2755386 100644 --- a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFolderTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFolderTest.java @@ -53,12 +53,12 @@ void testGetUnpackSizeForCoderOne() { @Test void testGetUnpackSizeOne() throws ArchiveException { final Folder folder = new Folder(); - folder.totalOutputStreams = 266L; + folder.totalOutputStreams = 266; final BindPair[] bindPairArray = new BindPair[1]; final BindPair bindPair = new BindPair(0, 0); bindPairArray[0] = bindPair; folder.bindPairs = bindPairArray; - folder.totalOutputStreams = 1L; + folder.totalOutputStreams = 1; assertEquals(0L, folder.getUnpackSize()); }
