This is an automated email from the ASF dual-hosted git repository. ggregory pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-compress.git
commit 1ede093532a13562f751837f27d2f84d57a40c37 Author: Gary D. Gregory <garydgreg...@gmail.com> AuthorDate: Tue Aug 19 07:37:36 2025 -0400 Add org.apache.commons.compress.archivers.tar.TarArchiveInputStream.Builder - Add org.apache.commons.compress.archivers.tar.TarArchiveInputStream.builder() --- src/changes/changes.xml | 2 + .../archivers/tar/TarArchiveInputStream.java | 94 ++++++++++++++++++++++ .../archivers/tar/TarArchiveInputStreamTest.java | 60 +++++++++++++- 3 files changed, 152 insertions(+), 4 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index b69ccf1c0..bae92e27a 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -113,6 +113,8 @@ The <action> type attribute can be add,update,fix,remove. <action type="add" issue="COMPRESS-705" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream.Builder.setIgnoreExtraField(boolean).</action> <action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.compress.compressors.snappy.SnappyCompressorInputStream.getUncompressedSize() and deprecate getSize().</action> <action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.compress.archivers.ArchiveInputStream.ArchiveInputStream(InputStream, Charset) as a public constructor, it was private.</action> + <action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.compress.archivers.tar.TarArchiveInputStream.builder().</action> + <action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.compress.archivers.tar.TarArchiveInputStream.Builder.</action> <!-- UPDATE --> </release> <release version="1.28.0" date="2025-07-26" description="This is a feature and maintenance release. Java 8 or later is required."> diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java index d0f7d471c..65d2fc71e 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java @@ -39,8 +39,10 @@ import org.apache.commons.compress.archivers.ArchiveInputStream; import org.apache.commons.compress.archivers.zip.ZipEncoding; import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.compress.utils.ArchiveUtils; import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.io.build.AbstractStreamBuilder; import org.apache.commons.io.input.BoundedInputStream; /** @@ -51,6 +53,79 @@ */ public class TarArchiveInputStream extends ArchiveInputStream<TarArchiveEntry> { + // @formatter:off + /** + * Builds a new {@link GzipCompressorInputStream}. + * + * <p> + * For example: + * </p> + * <pre>{@code + * TarArchiveInputStream s = TarArchiveInputStream.builder() + * .setPath(path) + * .setLenient(true) + * .setFileNameCharset(StandardCharsets.UTF_8) + * .get();} + * </pre> + * + * @see #get() + * @since 1.29.0 + */ + // @formatter:on + public static class Builder extends AbstractStreamBuilder<TarArchiveInputStream, Builder> { + + private int blockSize = TarConstants.DEFAULT_BLKSIZE; + private int recordSize = TarConstants.DEFAULT_RCDSIZE; + private boolean lenient; + + /** + * Constructs a new instance. + */ + public Builder() { + // empty + } + + @Override + public TarArchiveInputStream get() throws IOException { + return new TarArchiveInputStream(this); + } + + /** + * Sets the block size. + * + * @param blockSize the block size. + * @return {@code this} instance. + */ + public Builder setBlockSize(final int blockSize) { + this.blockSize = blockSize; + return this; + } + + /** + * Set whether illegal values for group/userid, mode, device numbers and timestamp will be ignored and the fields set to + * {@link TarArchiveEntry#UNKNOWN}. When set to false such illegal fields cause an exception instead. + * + * @param lenient whether illegal values throw exceptions. + * @return {@code this} instance. + */ + public Builder setLenient(final boolean lenient) { + this.lenient = lenient; + return this; + } + + /** + * Sets the record size. + * + * @param recordSize the record size. + * @return {@code this} instance. + */ + public Builder setRecordSize(final int recordSize) { + this.recordSize = recordSize; + return this; + } + + } + /** * IBM AIX <a href=""https://www.ibm.com/docs/sv/aix/7.2.0?topic=files-tarh-file">tar.h</a>: "This field is terminated with a space only." */ @@ -58,6 +133,16 @@ public class TarArchiveInputStream extends ArchiveInputStream<TarArchiveEntry> { private static final int SMALL_BUFFER_SIZE = 256; + /** + * Creates a new builder. + * + * @return a new builder. + * @since 1.29.0 + */ + public static Builder builder() { + return new Builder(); + } + /** * Checks if the signature matches what is expected for a tar file. * @@ -129,6 +214,15 @@ public static boolean matches(final byte[] signature, final int length) { private final boolean lenient; + @SuppressWarnings("resource") // caller closes. + private TarArchiveInputStream(final Builder builder) throws IOException { + super(builder.getInputStream(), builder.getCharset()); + this.zipEncoding = ZipEncodingHelper.getZipEncoding(builder.getCharset()); + this.recordBuffer = new byte[builder.recordSize]; + this.blockSize = builder.blockSize; + this.lenient = builder.lenient; + } + /** * Constructs a new instance. * diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java index 147356a9b..0dcaef850 100644 --- a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStreamTest.java @@ -63,6 +63,7 @@ import org.apache.commons.compress.archivers.ArchiveStreamFactory; import org.apache.commons.io.IOUtils; import org.apache.commons.io.function.IOConsumer; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; @@ -150,9 +151,13 @@ private void testCompress666(final int factor, final boolean bufferInputStream, final List<Future<?>> tasks = IntStream.range(0, 200).mapToObj(index -> executorService.submit(() -> { TarArchiveEntry tarEntry = null; try (InputStream inputStream = getClass().getResourceAsStream(localPath); - TarArchiveInputStream tarInputStream = new TarArchiveInputStream( - bufferInputStream ? new BufferedInputStream(new GZIPInputStream(inputStream)) : new GZIPInputStream(inputStream), - TarConstants.DEFAULT_RCDSIZE * factor, TarConstants.DEFAULT_RCDSIZE)) { + // @formatter:off + TarArchiveInputStream tarInputStream = TarArchiveInputStream.builder() + .setInputStream(bufferInputStream ? new BufferedInputStream(new GZIPInputStream(inputStream)) : new GZIPInputStream(inputStream)) + .setBlockSize(TarConstants.DEFAULT_RCDSIZE * factor) + .setRecordSize(TarConstants.DEFAULT_RCDSIZE) + .get()) { + // @formatter:on while ((tarEntry = tarInputStream.getNextEntry()) != null) { assertNotNull(tarEntry); } @@ -277,7 +282,7 @@ void testGetAndSetOfPaxEntry() throws Exception { * Depending on your setup, this test may need a small stack size {@code -Xss256k}. */ @Test - void testGetNextTarEntry() throws IOException { + void testGetNextEntry() throws IOException { try (TarArchiveInputStream inputStream = new TarArchiveInputStream( Files.newInputStream(Paths.get("src/test/resources/org/apache/commons/compress/tar/getNextTarEntry.bin")))) { final AtomicLong count = new AtomicLong(); @@ -311,6 +316,44 @@ void testGetNextTarEntry() throws IOException { } } + /** + * Depending on your setup, this test may need a small stack size {@code -Xss256k}. + */ + @Test + void testGetNextTarEntryDeprecated() throws IOException { + try (TarArchiveInputStream inputStream = new TarArchiveInputStream( + Files.newInputStream(Paths.get("src/test/resources/org/apache/commons/compress/tar/getNextTarEntry.bin")))) { + final AtomicLong count = new AtomicLong(); + final TarArchiveEntry entry = inputStream.getNextTarEntry(); + assertNull(entry.getCreationTime()); + assertNull(entry.getLastAccessTime()); + assertEquals(new Date(0), entry.getLastModifiedDate()); + assertEquals(FileTime.fromMillis(0), entry.getLastModifiedTime()); + assertNull(entry.getStatusChangeTime()); + assertEquals(-1, entry.getDataOffset()); + assertEquals(0, entry.getDevMajor()); + assertEquals(0, entry.getDevMinor()); + assertEquals(0, entry.getDirectoryEntries().length); + assertEquals(0, entry.getExtraPaxHeaders().size()); + assertEquals(0, entry.getOrderedSparseHeaders().size()); + assertEquals(0, entry.getSparseHeaders().size()); + assertNull(entry.getFile()); + assertNull(entry.getPath()); + assertEquals("", entry.getGroupName()); + assertEquals(0x1ff, entry.getMode()); + assertEquals("", entry.getName()); + assertEquals(0, entry.getRealSize()); + assertEquals(0, entry.getSize()); + assertEquals("", entry.getUserName()); + assertEquals("", entry.getLinkName()); + assertEquals(0x30, entry.getLinkFlag()); + assertEquals(0, entry.getLongGroupId()); + assertEquals(0, entry.getLongUserId()); + inputStream.forEach(e -> count.incrementAndGet()); + assertEquals(0, count.get()); + } + } + @Test void testMultiByteReadConsistentlyReturnsMinusOneAtEof() throws Exception { final byte[] buf = new byte[2]; @@ -347,6 +390,15 @@ void testParseTarWithNonNumberPaxHeaders() throws IOException { } } + @Test + @Disabled + void testChecksum() throws IOException { + try (InputStream in = newInputStream("org/apache/commons/compress/COMPRESS-707/COMPRESS-707.tar"); + TarArchiveInputStream archive = TarArchiveInputStream.builder().setInputStream(in).setLenient(true).get()) { + archive.getNextEntry(); + } + } + @Test void testParseTarWithSpecialPaxHeaders() throws IOException { try (InputStream in = newInputStream("COMPRESS-530-fail.tar");