This is an automated email from the ASF dual-hosted git repository. ggregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-compress.git
The following commit(s) were added to refs/heads/master by this push: new 7ced7a3b7 fix: Restore incremental file name handling (#698) 7ced7a3b7 is described below commit 7ced7a3b757bfd7288edc9908236db522ec9f254 Author: Piotr P. Karwasz <pi...@github.copernik.eu> AuthorDate: Wed Aug 27 13:07:19 2025 +0200 fix: Restore incremental file name handling (#698) * fix: Restore incremental file name handling The refactoring in #684 (never released) inadvertently dropped a critical part of long file name handling: names must be read incrementally to prevent excessive memory allocation if an entry is corrupted. This change restores the previous safe behavior and adds two unit tests to ensure: * POSIX and NTFS file names up to the maximum allowed length are handled correctly. * Corrupted entries are detected and throw an `ArchiveException`. * fix: Improve error messages and coverage * fix: use locale-specific integer formatting * fix: use default UnsynchronizedBAOS buffer size * fix: prefix test cases with `test` * fix: simplify test file name construction --- .../commons/compress/archivers/tar/TarUtils.java | 32 ++++-- .../compress/archivers/tar/TarUtilsTest.java | 124 +++++++++++++++++++++ 2 files changed, 145 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java index 792fb5a74..3938ce2f3 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java @@ -39,7 +39,9 @@ import org.apache.commons.compress.utils.ArchiveUtils; import org.apache.commons.compress.utils.IOUtils; import org.apache.commons.compress.utils.ParsingUtils; +import org.apache.commons.io.input.BoundedInputStream; import org.apache.commons.io.output.ByteArrayOutputStream; +import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream; /** * This class provides static utility methods to work with byte streams. @@ -886,22 +888,30 @@ private static long[] readLineOfNumberForPax1x(final InputStream inputStream) th * @throws IOException if an I/O error occurs or the entry is truncated. * @throws ArchiveException if the entry size is invalid. */ - private static String readLongName(final InputStream input, final ZipEncoding encoding, final TarArchiveEntry entry) + static String readLongName(final InputStream input, final ZipEncoding encoding, final TarArchiveEntry entry) throws IOException { final long size = entry.getSize(); + // The encoding requires a byte array, whose size must be a positive int. if (size > Integer.MAX_VALUE) { - throw new ArchiveException("Invalid long name size: " + entry.getSize()); - } - final int sizeInt = (int) size; - final byte[] buffer = new byte[sizeInt]; - if (IOUtils.readFully(input, buffer, 0, sizeInt) < sizeInt) { - throw new ArchiveException("TAR entry is truncated."); - } - int length = buffer.length; - while (length > 0 && buffer[length - 1] == 0) { + throw new ArchiveException("Invalid long name entry: size %,d exceeds maximum allowed.", entry.getSize()); + } + // Read the long name incrementally to limit memory allocation in case of a corrupted entry. + final BoundedInputStream boundedInput = BoundedInputStream.builder() + .setInputStream(input) + .setMaxCount(size) + .setPropagateClose(false) + .get(); + final UnsynchronizedByteArrayOutputStream outputStream = UnsynchronizedByteArrayOutputStream.builder().get(); + final long read = org.apache.commons.io.IOUtils.copyLarge(boundedInput, outputStream); + if (read != size) { + throw new ArchiveException("Truncated long name entry: expected %,d bytes, read %,d bytes.", size, read); + } + final byte[] name = outputStream.toByteArray(); + int length = name.length; + while (length > 0 && name[length - 1] == 0) { length--; } - return encoding.decode(Arrays.copyOf(buffer, length)); + return encoding.decode(Arrays.copyOf(name, length)); } /** diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java index f1ac9593f..4a19b3c45 100644 --- a/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java @@ -22,6 +22,7 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -31,17 +32,24 @@ import java.io.UncheckedIOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.stream.Stream; import org.apache.commons.compress.AbstractTest; import org.apache.commons.compress.archivers.ArchiveException; import org.apache.commons.compress.archivers.zip.ZipEncoding; import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; import org.apache.commons.compress.utils.ByteUtils; +import org.apache.commons.io.input.NullInputStream; +import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; class TarUtilsTest extends AbstractTest { @@ -599,4 +607,120 @@ void testWriteNegativeBinary8Byte() { assertEquals(-3601L, TarUtils.parseOctalOrBinary(b, 0, 8)); } + /** + * Builds an NTFS-style path (\\?\C:\...) up to a target total UTF-16 length, respecting 255-unit segments. + */ + private static String createNtfsLongNameByUtf16Units(final int totalUnits) { + final String prefix = "\\\\?\\C:\\"; + final String extension = ".txt"; + + // U+2605 BLACK STAR (BMP, 1 UTF-16 unit, 3 UTF-8 bytes) => lets us pack 255 units per segment easily + final String segment = StringUtils.repeat("★", 255) + '\\'; + + final StringBuilder sb = new StringBuilder(prefix); + while (sb.length() + extension.length() < totalUnits) { + sb.append(segment); + } + + // Trim to exact totalUnits (UTF-16 units), then append extension + sb.setLength(totalUnits - extension.length()); + sb.append(extension); + assertEquals(totalUnits, sb.length(), "Final length should be " + totalUnits + " UTF-16 code units"); + return sb.toString(); + } + + /** + * Builds a POSIX-style path (rooted at `/`) up to a target total *byte* length in UTF-8, 255 bytes/segment. + */ + private static String createPosixLongNameByUtf8Bytes(final int totalBytes) { + final String extension = ".txt"; + // U+2605 BLACK STAR (BMP, 1 UTF-16 unit, 3 UTF-8 bytes) => 85 * 3 UTF-8 bytes = 255 bytes + final String segment = StringUtils.repeat("★", 85) + '/'; + assertEquals(256, utf8Len(segment), "Segment length with separator should be 256 bytes in UTF-8"); + + final StringBuilder sb = new StringBuilder(); + int count = totalBytes / 256; // how many full 256-byte chunks can we fit? + while (count-- > 0) { + sb.append(segment); + } + count = totalBytes - utf8Len(sb) - utf8Len(extension); + while (count-- > 0) { + sb.append('a'); + } + sb.append(extension); + assertEquals(totalBytes, utf8Len(sb), "Final length should be " + totalBytes + " bytes in UTF-8"); + return sb.toString(); + } + + private static int utf8Len(final CharSequence s) { + return s.toString().getBytes(UTF_8).length; + } + + private static byte[] utf8Bytes(final String s) { + return s.getBytes(UTF_8); + } + + private static byte[] paddedUtf8Bytes(final String s) { + final int blockSize = 1024; + final byte[] bytes = s.getBytes(UTF_8); + return Arrays.copyOf(bytes, ((bytes.length + blockSize - 1) / blockSize) * blockSize); + } + + static Stream<Arguments> testReadLongNameHandlesLimits() { + final String empty = ""; + final String ntfsLongName = createNtfsLongNameByUtf16Units(32767); + final String posixLongName = createPosixLongNameByUtf8Bytes(4095); + return Stream.of( + Arguments.of("Empty", empty, utf8Bytes(empty)), + Arguments.of("Empty (padded)", empty, paddedUtf8Bytes(empty)), + Arguments.of("NTFS", ntfsLongName, utf8Bytes(ntfsLongName)), + Arguments.of("NTFS (padded)", ntfsLongName, paddedUtf8Bytes(ntfsLongName)), + Arguments.of("POSIX", posixLongName, utf8Bytes(posixLongName)), + Arguments.of("POSIX (padded)", posixLongName, paddedUtf8Bytes(posixLongName))); + } + + @ParameterizedTest(name = "{0} long name is read correctly") + @MethodSource + void testReadLongNameHandlesLimits(final String kind, final String expectedName, final byte[] data) throws IOException { + final TarArchiveEntry entry = new TarArchiveEntry("test"); + entry.setSize(data.length); + // Lets add a trailing "garbage" to ensure we only read what we should + final byte[] dataWithGarbage = Arrays.copyOf(data, data.length + 1024); + Arrays.fill(dataWithGarbage, data.length, dataWithGarbage.length, (byte) 0xFF); + + try (InputStream in = new ByteArrayInputStream(dataWithGarbage)) { + final String actualName = TarUtils.readLongName(in, ZipEncodingHelper.getZipEncoding(UTF_8), entry); + assertEquals( + expectedName, + actualName, + () -> String.format("[%s] The long name read does not match the expected value.", kind)); + } + } + + static Stream<Arguments> testReadLongNameThrowsOnTruncation() { + return Stream.of( + Arguments.of(Integer.MAX_VALUE, "truncated long name"), + Arguments.of(Long.MAX_VALUE, "invalid long name")); + } + + @ParameterizedTest(name = "readLongName of {0} bytes throws ArchiveException") + @MethodSource + void testReadLongNameThrowsOnTruncation(final long size, final CharSequence expectedMessage) throws IOException { + final TarArchiveEntry entry = new TarArchiveEntry("test"); + entry.setSize(size); // absurdly large so any finite stream truncates + try (InputStream in = new NullInputStream()) { + final ArchiveException ex = assertThrows( + ArchiveException.class, + () -> TarUtils.readLongName(in, TarUtils.DEFAULT_ENCODING, entry), + "Expected ArchiveException due to truncated long name, but no exception was thrown"); + final String actualMessage = StringUtils.toRootLowerCase(ex.getMessage()); + assertNotNull(actualMessage, "Exception message should not be null"); + assertTrue( + actualMessage.contains(expectedMessage), + () -> "Expected exception message to contain '" + expectedMessage + "', but got: " + actualMessage); + assertTrue( + actualMessage.contains(String.format("%,d", size)), + () -> "Expected exception message to mention '" + size + "', but got: " + actualMessage); + } + } }