(commons-compress) branch master updated: fix: Restore incremental file name handling (#698)

ggregory Wed, 27 Aug 2025 04:09:32 -0700

This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-compress.git



The following commit(s) were added to refs/heads/master by this push:
     new 7ced7a3b7 fix: Restore incremental file name handling (#698)
7ced7a3b7 is described below

commit 7ced7a3b757bfd7288edc9908236db522ec9f254
Author: Piotr P. Karwasz <pi...@github.copernik.eu>
AuthorDate: Wed Aug 27 13:07:19 2025 +0200

    fix: Restore incremental file name handling (#698)
    
    * fix: Restore incremental file name handling
    
    The refactoring in #684 (never released) inadvertently dropped a critical 
part of long file name handling: names must be read incrementally to prevent 
excessive memory allocation if an entry is corrupted.
    
    This change restores the previous safe behavior and adds two unit tests to 
ensure:
    
    * POSIX and NTFS file names up to the maximum allowed length are handled 
correctly.
    * Corrupted entries are detected and throw an `ArchiveException`.
    
    * fix: Improve error messages and coverage
    
    * fix: use locale-specific integer formatting
    
    * fix: use default UnsynchronizedBAOS buffer size
    
    * fix: prefix test cases with `test`
    
    * fix: simplify test file name construction
---
 .../commons/compress/archivers/tar/TarUtils.java   |  32 ++++--
 .../compress/archivers/tar/TarUtilsTest.java       | 124 +++++++++++++++++++++
 2 files changed, 145 insertions(+), 11 deletions(-)

diff --git 
a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java 
b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
index 792fb5a74..3938ce2f3 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
@@ -39,7 +39,9 @@
 import org.apache.commons.compress.utils.ArchiveUtils;
 import org.apache.commons.compress.utils.IOUtils;
 import org.apache.commons.compress.utils.ParsingUtils;
+import org.apache.commons.io.input.BoundedInputStream;
 import org.apache.commons.io.output.ByteArrayOutputStream;
+import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
 
 /**
  * This class provides static utility methods to work with byte streams.
@@ -886,22 +888,30 @@ private static long[] readLineOfNumberForPax1x(final 
InputStream inputStream) th
      * @throws IOException if an I/O error occurs or the entry is truncated.
      * @throws ArchiveException if the entry size is invalid.
      */
-    private static String readLongName(final InputStream input, final 
ZipEncoding encoding, final TarArchiveEntry entry)
+    static String readLongName(final InputStream input, final ZipEncoding 
encoding, final TarArchiveEntry entry)
             throws IOException {
         final long size = entry.getSize();
+        // The encoding requires a byte array, whose size must be a positive 
int.
         if (size > Integer.MAX_VALUE) {
-            throw new ArchiveException("Invalid long name size: " + 
entry.getSize());
-        }
-        final int sizeInt = (int) size;
-        final byte[] buffer = new byte[sizeInt];
-        if (IOUtils.readFully(input, buffer, 0, sizeInt) < sizeInt) {
-            throw new ArchiveException("TAR entry is truncated.");
-        }
-        int length = buffer.length;
-        while (length > 0 && buffer[length - 1] == 0) {
+            throw new ArchiveException("Invalid long name entry: size %,d 
exceeds maximum allowed.", entry.getSize());
+        }
+        // Read the long name incrementally to limit memory allocation in case 
of a corrupted entry.
+        final BoundedInputStream boundedInput = BoundedInputStream.builder()
+                .setInputStream(input)
+                .setMaxCount(size)
+                .setPropagateClose(false)
+                .get();
+        final UnsynchronizedByteArrayOutputStream outputStream = 
UnsynchronizedByteArrayOutputStream.builder().get();
+        final long read = 
org.apache.commons.io.IOUtils.copyLarge(boundedInput, outputStream);
+        if (read != size) {
+            throw new ArchiveException("Truncated long name entry: expected 
%,d bytes, read %,d bytes.", size, read);
+        }
+        final byte[] name = outputStream.toByteArray();
+        int length = name.length;
+        while (length > 0 && name[length - 1] == 0) {
             length--;
         }
-        return encoding.decode(Arrays.copyOf(buffer, length));
+        return encoding.decode(Arrays.copyOf(name, length));
     }
 
     /**
diff --git 
a/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java 
b/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java
index f1ac9593f..4a19b3c45 100644
--- a/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java
@@ -22,6 +22,7 @@
 import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
@@ -31,17 +32,24 @@
 import java.io.UncheckedIOException;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.stream.Stream;
 
 import org.apache.commons.compress.AbstractTest;
 import org.apache.commons.compress.archivers.ArchiveException;
 import org.apache.commons.compress.archivers.zip.ZipEncoding;
 import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
 import org.apache.commons.compress.utils.ByteUtils;
+import org.apache.commons.io.input.NullInputStream;
+import org.apache.commons.lang3.StringUtils;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
 
 class TarUtilsTest extends AbstractTest {
 
@@ -599,4 +607,120 @@ void testWriteNegativeBinary8Byte() {
         assertEquals(-3601L, TarUtils.parseOctalOrBinary(b, 0, 8));
     }
 
+    /**
+     * Builds an NTFS-style path (\\?\C:\...) up to a target total UTF-16 
length, respecting 255-unit segments.
+     */
+    private static String createNtfsLongNameByUtf16Units(final int totalUnits) 
{
+        final String prefix = "\\\\?\\C:\\";
+        final String extension = ".txt";
+
+        // U+2605 BLACK STAR (BMP, 1 UTF-16 unit, 3 UTF-8 bytes) => lets us 
pack 255 units per segment easily
+        final String segment = StringUtils.repeat("★", 255) + '\\';
+
+        final StringBuilder sb = new StringBuilder(prefix);
+        while (sb.length() + extension.length() < totalUnits) {
+            sb.append(segment);
+        }
+
+        // Trim to exact totalUnits (UTF-16 units), then append extension
+        sb.setLength(totalUnits - extension.length());
+        sb.append(extension);
+        assertEquals(totalUnits, sb.length(), "Final length should be " + 
totalUnits + " UTF-16 code units");
+        return sb.toString();
+    }
+
+    /**
+     * Builds a POSIX-style path (rooted at `/`) up to a target total *byte* 
length in UTF-8, 255 bytes/segment.
+     */
+    private static String createPosixLongNameByUtf8Bytes(final int totalBytes) 
{
+        final String extension = ".txt";
+        // U+2605 BLACK STAR (BMP, 1 UTF-16 unit, 3 UTF-8 bytes) => 85 * 3 
UTF-8 bytes = 255 bytes
+        final String segment = StringUtils.repeat("★", 85) + '/';
+        assertEquals(256, utf8Len(segment), "Segment length with separator 
should be 256 bytes in UTF-8");
+
+        final StringBuilder sb = new StringBuilder();
+        int count = totalBytes / 256; // how many full 256-byte chunks can we 
fit?
+        while (count-- > 0) {
+            sb.append(segment);
+        }
+        count = totalBytes - utf8Len(sb) - utf8Len(extension);
+        while (count-- > 0) {
+            sb.append('a');
+        }
+        sb.append(extension);
+        assertEquals(totalBytes, utf8Len(sb), "Final length should be " + 
totalBytes + " bytes in UTF-8");
+        return sb.toString();
+    }
+
+    private static int utf8Len(final CharSequence s) {
+        return s.toString().getBytes(UTF_8).length;
+    }
+
+    private static byte[] utf8Bytes(final String s) {
+        return s.getBytes(UTF_8);
+    }
+
+    private static byte[] paddedUtf8Bytes(final String s) {
+        final int blockSize = 1024;
+        final byte[] bytes = s.getBytes(UTF_8);
+        return Arrays.copyOf(bytes, ((bytes.length + blockSize - 1) / 
blockSize) * blockSize);
+    }
+
+    static Stream<Arguments> testReadLongNameHandlesLimits() {
+        final String empty = "";
+        final String ntfsLongName = createNtfsLongNameByUtf16Units(32767);
+        final String posixLongName = createPosixLongNameByUtf8Bytes(4095);
+        return Stream.of(
+                Arguments.of("Empty", empty, utf8Bytes(empty)),
+                Arguments.of("Empty (padded)", empty, paddedUtf8Bytes(empty)),
+                Arguments.of("NTFS", ntfsLongName, utf8Bytes(ntfsLongName)),
+                Arguments.of("NTFS (padded)", ntfsLongName, 
paddedUtf8Bytes(ntfsLongName)),
+                Arguments.of("POSIX", posixLongName, utf8Bytes(posixLongName)),
+                Arguments.of("POSIX (padded)", posixLongName, 
paddedUtf8Bytes(posixLongName)));
+    }
+
+    @ParameterizedTest(name = "{0} long name is read correctly")
+    @MethodSource
+    void testReadLongNameHandlesLimits(final String kind, final String 
expectedName, final byte[] data) throws IOException {
+        final TarArchiveEntry entry = new TarArchiveEntry("test");
+        entry.setSize(data.length);
+        // Lets add a trailing "garbage" to ensure we only read what we should
+        final byte[] dataWithGarbage = Arrays.copyOf(data, data.length + 1024);
+        Arrays.fill(dataWithGarbage, data.length, dataWithGarbage.length, 
(byte) 0xFF);
+
+        try (InputStream in = new ByteArrayInputStream(dataWithGarbage)) {
+            final String actualName = TarUtils.readLongName(in, 
ZipEncodingHelper.getZipEncoding(UTF_8), entry);
+            assertEquals(
+                    expectedName,
+                    actualName,
+                    () -> String.format("[%s] The long name read does not 
match the expected value.", kind));
+        }
+    }
+
+    static Stream<Arguments> testReadLongNameThrowsOnTruncation() {
+        return Stream.of(
+                Arguments.of(Integer.MAX_VALUE, "truncated long name"),
+                Arguments.of(Long.MAX_VALUE, "invalid long name"));
+    }
+
+    @ParameterizedTest(name = "readLongName of {0} bytes throws 
ArchiveException")
+    @MethodSource
+    void testReadLongNameThrowsOnTruncation(final long size, final 
CharSequence expectedMessage) throws IOException {
+        final TarArchiveEntry entry = new TarArchiveEntry("test");
+        entry.setSize(size); // absurdly large so any finite stream truncates
+        try (InputStream in = new NullInputStream()) {
+            final ArchiveException ex = assertThrows(
+                    ArchiveException.class,
+                    () -> TarUtils.readLongName(in, TarUtils.DEFAULT_ENCODING, 
entry),
+                    "Expected ArchiveException due to truncated long name, but 
no exception was thrown");
+            final String actualMessage = 
StringUtils.toRootLowerCase(ex.getMessage());
+            assertNotNull(actualMessage, "Exception message should not be 
null");
+            assertTrue(
+                    actualMessage.contains(expectedMessage),
+                    () -> "Expected exception message to contain '" + 
expectedMessage + "', but got: " + actualMessage);
+            assertTrue(
+                    actualMessage.contains(String.format("%,d", size)),
+                    () -> "Expected exception message to mention '" + size + 
"', but got: " + actualMessage);
+        }
+    }
 }

(commons-compress) branch master updated: fix: Restore incremental file name handling (#698)

Reply via email to