This is an automated email from the ASF dual-hosted git repository. pkarwasz pushed a commit to branch feat/archive-file-interface in repository https://gitbox.apache.org/repos/asf/commons-compress.git
commit e64e6d219520a16eb9c83f6d76d85ebada0dcbda Author: Piotr P. Karwasz <pkarwasz-git...@apache.org> AuthorDate: Tue Sep 23 13:22:29 2025 +0200 feat: introduce `ArchiveFile` abstraction for file-based archives This PR adds a new `ArchiveFile` interface to unify the handling of file-based archive utilities such as `SevenZFile`, `TarFile`, and `ZipFile`. Although these classes target different archive formats, they share several core characteristics: * All are `Closeable`. * Each provides the same method to open an `InputStream` for a given entry (`InputStream getInputStream(T)` where `T extends ArchiveEntry`). * Historically, their `getEntries()` methods returned incompatible types. This PR introduces a common `List<? extends T> entries()` method, aligning with `java.util.zip.ZipFile` in name but offering a modern return value. * The `ZipFile#stream()` method (added in 1.28.0) is now part of this abstraction. This change establishes a consistent, format-agnostic API for working with archive files, reducing duplication and improving discoverability for users. --- src/changes/changes.xml | 3 +- .../commons/compress/archivers/ArchiveFile.java | 82 ++++++++++++ .../compress/archivers/sevenz/SevenZFile.java | 17 ++- .../commons/compress/archivers/tar/TarFile.java | 17 ++- .../commons/compress/archivers/zip/ZipFile.java | 8 +- .../archivers/AbstractArchiveFileTest.java | 138 +++++++++++++++++++++ .../compress/archivers/sevenz/SevenZFileTest.java | 9 +- .../compress/archivers/tar/TarFileTest.java | 12 +- .../compress/archivers/zip/ZipFileTest.java | 8 +- 9 files changed, 279 insertions(+), 15 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 7f46fc8c5..acf52a01d 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -70,7 +70,7 @@ The <action> type attribute can be add,update,fix,remove. <action type="fix" dev="ggregory" due-to="Tyler Nighswander, Gary Gregory">ZipArchiveInputStream.read() now throws an IOException instead of java.lang.ArrayIndexOutOfBoundsException.</action> <action type="fix" dev="ggregory" due-to="Stanislav Fort, Gary Gregory">ZipArchiveInputStream now throws an MemoryLimitException instead of ArchiveException, or OutOfMemoryError when running with low memory settings set on the command line.</action> <action type="fix" dev="ggregory" due-to="YuWeiBoy, Gary Gregory" issue="COMPRESS-708">ZstdCompressorInputStream closes the InputStream held by ZipArchiveInputStream garbage collection.</action> - <!-- FIX tar --> + <!-- FIX tar --> <action type="fix" dev="pkarwasz" due-to="Tyler Nighswander, Piotr P. Karwasz, Gary Gregory">>Uniform handling of special tar records in TarFile and TarArchiveInputStream.</action> <action type="fix" dev="ggregory" due-to="Gary Gregory, Stanislav Fort">TarArchiveOutputStream now throws a IllegalArgumentException instead of an OutOfMemoryError.</action> <action type="fix" dev="ggregory" issue="COMPRESS-707" due-to="Gary Gregory, Roel van Dijk">TarUtils.verifyCheckSum() throws an Exception when checksum could not be parsed.</action> @@ -123,6 +123,7 @@ The <action> type attribute can be add,update,fix,remove. <action type="add" dev="ggregory" due-to="Gary Gregory, Piotr P. Karwasz">Introduce builders for all ArchiveInputStream implementations and deprecate some constructors.</action> <action type="add" dev="ggregory" due-to="Gary Gregory">TarFile now implements IOIterable<TarArchiveEntry>.</action> <action type="add" dev="ggregory" due-to="Gary Gregory">Add a builder for the TarFile class and deprecate some constructors.</action> + <action type="add" dev="pkarwasz" due-to="Piotr P. Karwasz">Introduce an ArchiveFile abstraction to unify the APIs of SevenZFile, TarFile, and ZipFile.</action> <!-- UPDATE --> <action type="update" dev="ggregory" due-to="Gary Gregory">Bump org.apache.commons:commons-parent from 85 to 88 #707.</action> <action type="update" dev="ppkarwasz" due-to="Raeps">Extract duplicate code in org.apache.commons.compress.harmony.pack200.IntList.</action> diff --git a/src/main/java/org/apache/commons/compress/archivers/ArchiveFile.java b/src/main/java/org/apache/commons/compress/archivers/ArchiveFile.java new file mode 100644 index 000000000..b6c236eab --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/ArchiveFile.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import java.util.stream.Collectors; +import java.util.zip.ZipFile; + +import org.apache.commons.io.function.IOStream; + +/** + * A file-based representation of an archive containing multiple {@link ArchiveEntry entries}. + * + * <p>This interface provides a higher-level abstraction over archive files, similar to + * {@link ZipFile}, but generalized for a variety of archive formats.</p> + * + * <p>Implementations are {@link Closeable} and should be closed once they are no longer + * needed in order to release any underlying system resources.</p> + * + * @param <T> the type of {@link ArchiveEntry} produced by this archive + * @since 1.29.0 + */ +public interface ArchiveFile<T extends ArchiveEntry> extends Closeable { + + /** + * Returns all entries contained in the archive as a list. + * + * <p>The order of entries is format-dependent but guaranteed to be consistent + * across multiple invocations on the same archive.</p> + * + * @return An immutable list of all entries in this archive. + * @throws IOException If an I/O error occurs while reading entries. + */ + default List<? extends T> entries() throws IOException { + try (IOStream<? extends T> stream = stream()) { + return stream.collect(Collectors.toList()); + } + } + + /** + * Returns a sequential stream of archive entries. + * + * <p>The order of entries is format-dependent but stable for a given archive.</p> + * <p>The returned stream <strong>must</strong> be closed after use to free + * associated resources.</p> + * + * @return A stream of entries in this archive. + * @throws IOException If an I/O error occurs while creating the stream. + */ + IOStream<? extends T> stream() throws IOException; + + /** + * Opens an input stream for the specified entry's contents. + * + * <p>The caller is responsible for closing the returned stream after use.</p> + * + * @param entry The archive entry to read. + * @return An input stream providing the contents of the given entry. + * @throws IOException If an I/O error occurs while opening the entry stream. + */ + InputStream getInputStream(T entry) throws IOException; +} + diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java index 28dfddb27..6815c242c 100644 --- a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java @@ -23,7 +23,6 @@ import java.io.BufferedInputStream; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; -import java.io.Closeable; import java.io.DataInputStream; import java.io.EOFException; import java.io.File; @@ -52,11 +51,13 @@ import org.apache.commons.compress.MemoryLimitException; import org.apache.commons.compress.archivers.ArchiveException; +import org.apache.commons.compress.archivers.ArchiveFile; import org.apache.commons.compress.utils.IOUtils; import org.apache.commons.compress.utils.InputStreamStatistics; import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; import org.apache.commons.io.build.AbstractOrigin.ByteArrayOrigin; import org.apache.commons.io.build.AbstractStreamBuilder; +import org.apache.commons.io.function.IOStream; import org.apache.commons.io.input.BoundedInputStream; import org.apache.commons.io.input.ChecksumInputStream; import org.apache.commons.lang3.ArrayUtils; @@ -84,7 +85,7 @@ * @NotThreadSafe * @since 1.6 */ -public class SevenZFile implements Closeable { +public class SevenZFile implements ArchiveFile<SevenZArchiveEntry> { private static final class ArchiveStatistics { private int numberOfPackedStreams; @@ -1057,11 +1058,22 @@ public String getDefaultName() { * * @return a copy of meta-data of all archive entries. * @since 1.11 + * @deprecated Since 1.29.0, use {@link #entries()} or {@link #stream()} instead. */ + @Deprecated public Iterable<SevenZArchiveEntry> getEntries() { return new ArrayList<>(Arrays.asList(archive.files)); } + /** + * {@inheritDoc} + * @since 1.29.0 + */ + @Override + public IOStream<? extends SevenZArchiveEntry> stream() throws IOException { + return IOStream.of(archive.files); + } + /** * Gets an InputStream for reading the contents of the given entry. * <p> @@ -1073,6 +1085,7 @@ public Iterable<SevenZArchiveEntry> getEntries() { * @throws IOException if unable to create an input stream from the entry * @since 1.20 */ + @Override public InputStream getInputStream(final SevenZArchiveEntry entry) throws IOException { int entryIndex = -1; for (int i = 0; i < archive.files.length; i++) { diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java index 9ebb6c1d6..6fefb29a1 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java @@ -18,7 +18,6 @@ */ package org.apache.commons.compress.archivers.tar; -import java.io.Closeable; import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -34,6 +33,7 @@ import java.util.Map; import org.apache.commons.compress.archivers.ArchiveException; +import org.apache.commons.compress.archivers.ArchiveFile; import org.apache.commons.compress.archivers.zip.ZipEncoding; import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; @@ -43,6 +43,7 @@ import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; import org.apache.commons.io.function.IOIterable; import org.apache.commons.io.function.IOIterator; +import org.apache.commons.io.function.IOStream; import org.apache.commons.io.input.BoundedInputStream; /** @@ -50,7 +51,7 @@ * * @since 1.21 */ -public class TarFile implements Closeable, IOIterable<TarArchiveEntry> { +public class TarFile implements ArchiveFile<TarArchiveEntry>, IOIterable<TarArchiveEntry> { private final class BoundedTarEntryInputStream extends BoundedArchiveInputStream { @@ -448,11 +449,22 @@ private void consumeRemainderOfLastBlock() throws IOException { * Gets all TAR Archive Entries from the TarFile. * * @return All entries from the tar file. + * @deprecated Since 1.29.0, use {@link #entries()} or {@link #stream()} instead. */ + @Deprecated public List<TarArchiveEntry> getEntries() { return new ArrayList<>(entries); } + /** + * {@inheritDoc} + * @since 1.29.0 + */ + @Override + public IOStream<? extends TarArchiveEntry> stream() throws IOException { + return IOStream.of(entries); + } + /** * Gets the input stream for the provided Tar Archive Entry. * @@ -460,6 +472,7 @@ public List<TarArchiveEntry> getEntries() { * @return Input stream of the provided entry. * @throws IOException Corrupted TAR archive. Can't read entry. */ + @Override public InputStream getInputStream(final TarArchiveEntry entry) throws IOException { try { return new BoundedTarEntryInputStream(entry, archive); diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java index 3e28361c3..051c52b89 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java @@ -20,7 +20,6 @@ import java.io.BufferedInputStream; import java.io.ByteArrayInputStream; -import java.io.Closeable; import java.io.EOFException; import java.io.File; import java.io.IOException; @@ -52,6 +51,7 @@ import java.util.zip.ZipException; import org.apache.commons.compress.archivers.ArchiveException; +import org.apache.commons.compress.archivers.ArchiveFile; import org.apache.commons.compress.archivers.EntryStreamOffsets; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; @@ -92,7 +92,7 @@ * <li>close is allowed to throw IOException.</li> * </ul> */ -public class ZipFile implements Closeable { +public class ZipFile implements ArchiveFile<ZipArchiveEntry> { /** * Lock-free implementation of BoundedInputStream. The implementation uses positioned reads on the underlying archive file channel and therefore performs @@ -1154,7 +1154,9 @@ public String getEncoding() { * </p> * * @return all entries as {@link ZipArchiveEntry} instances + * @deprecated Since 1.29.0, use {@link #entries()} or {@link #stream()} instead. */ + @Deprecated public Enumeration<ZipArchiveEntry> getEntries() { return Collections.enumeration(entries); } @@ -1227,6 +1229,7 @@ public long getFirstLocalFileHeaderOffset() { * @return a stream to read the entry from. The returned stream implements {@link InputStreamStatistics}. * @throws IOException if unable to create an input stream from the zipEntry. */ + @Override public InputStream getInputStream(final ZipArchiveEntry entry) throws IOException { if (!(entry instanceof Entry)) { return null; @@ -1757,6 +1760,7 @@ private boolean startsWithLocalFileHeader() throws IOException { * @throws IllegalStateException if the ZIP file has been closed. * @since 1.28.0 */ + @Override public IOStream<? extends ZipArchiveEntry> stream() { return IOStream.adapt(entries.stream()); } diff --git a/src/test/java/org/apache/commons/compress/archivers/AbstractArchiveFileTest.java b/src/test/java/org/apache/commons/compress/archivers/AbstractArchiveFileTest.java new file mode 100644 index 000000000..cb2d0268c --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/AbstractArchiveFileTest.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.io.InputStream; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.util.Arrays; +import java.util.Date; +import java.util.List; + +import org.apache.commons.compress.AbstractTest; +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.function.IOStream; +import org.junit.jupiter.api.Test; + +/** + * Abstract base class for tests of {@link ArchiveFile} implementations. + * + * @param <T> The type of {@link ArchiveEntry} produced. + */ +public abstract class AbstractArchiveFileTest<T extends ArchiveEntry> extends AbstractTest { + + private static ArchiveEntry newEntryUtc(String name, long size, LocalDateTime lastModified) { + return newEntry(name, size, lastModified.toInstant(ZoneOffset.UTC)); + } + + private static ArchiveEntry newEntry(String name, long size, Instant lastModified) { + return new ArchiveEntry() { + + @Override + public String getName() { + return name; + } + + @Override + public long getSize() { + return size; + } + + @Override + public boolean isDirectory() { + return false; + } + + @Override + public Date getLastModifiedDate() { + return Date.from(lastModified); + } + }; + } + + /** + * Returns an {@link ArchiveFile} to be tested. + * + * @return The archive file to be tested. + */ + protected abstract ArchiveFile<T> getArchiveFile() throws Exception; + + /** + * Returns the expected entries in the test archive. + * + * @return The expected entries. + */ + private List<? extends ArchiveEntry> getExpectedEntries() { + return Arrays.asList( + newEntryUtc("test1.xml", 610, LocalDateTime.of(2007, 11, 14, 10, 19, 2)), + newEntryUtc("test2.xml", 82, LocalDateTime.of(2007, 11, 14, 10, 19, 2))); + } + + /** + * Tests that the entries returned by {@link ArchiveFile#entries()} match the expected entries. + */ + @Test + void testEntries() throws Exception { + try (ArchiveFile<T> archiveFile = getArchiveFile()) { + final List<? extends T> entries = archiveFile.entries(); + final List<? extends ArchiveEntry> expectedEntries = getExpectedEntries(); + assertEquals(expectedEntries.size(), entries.size(), "Number of entries"); + for (int i = 0; i < expectedEntries.size(); i++) { + final ArchiveEntry expected = expectedEntries.get(i); + final ArchiveEntry actual = entries.get(i); + assertEquals(expected.getName(), actual.getName(), "Entry name at index " + i); + assertEquals(expected.getSize(), actual.getSize(), "Size of entry " + expected.getName()); + assertEquals( + expected.getLastModifiedDate(), + actual.getLastModifiedDate(), + "Last modified date of entry " + expected.getName()); + } + } + } + + /** + * Tests that the input streams returned by {@link ArchiveFile#getInputStream(ArchiveEntry)} match the expected + * entries. + */ + @Test + void testGetInputStream() throws Exception { + try (ArchiveFile<T> archiveFile = getArchiveFile()) { + final List<? extends ArchiveEntry> expectedEntries = getExpectedEntries(); + for (final ArchiveEntry expected : expectedEntries) { + final T actual = getMatchingEntry(archiveFile, expected.getName()); + assertNotNull(actual, "Entry " + expected.getName() + " not found"); + try (InputStream inputStream = archiveFile.getInputStream(actual)) { + assertNotNull(inputStream, "Input stream for entry " + expected.getName()); + final byte[] content = IOUtils.toByteArray(inputStream); + assertEquals(expected.getSize(), content.length, "Size of entry " + expected.getName()); + } + } + } + } + + private T getMatchingEntry(ArchiveFile<? extends T> archiveFile, String name) throws Exception { + try (IOStream<? extends T> stream = archiveFile.stream()) { + return stream.filter(e -> e.getName().equals(name)).findFirst().orElse(null); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java index 446a2ba64..d8534d451 100644 --- a/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/sevenz/SevenZFileTest.java @@ -51,9 +51,9 @@ import javax.crypto.Cipher; -import org.apache.commons.compress.AbstractTest; import org.apache.commons.compress.MemoryLimitException; import org.apache.commons.compress.PasswordRequiredException; +import org.apache.commons.compress.archivers.AbstractArchiveFileTest; import org.apache.commons.compress.archivers.ArchiveException; import org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel; import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; @@ -62,7 +62,7 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -class SevenZFileTest extends AbstractTest { +class SevenZFileTest extends AbstractArchiveFileTest<SevenZArchiveEntry> { private static final String TEST2_CONTENT = "<?xml version = '1.0'?>\r\n<!DOCTYPE connections>\r\n<meinxml>\r\n\t<leer />\r\n</meinxml>\n"; private static boolean isStrongCryptoAvailable() throws NoSuchAlgorithmException { @@ -1050,4 +1050,9 @@ void testSignatureCheck() { final byte[] data3 = { '7', 'z', (byte) 0xBC, (byte) 0xAF, 0x27, 0x1D }; assertFalse(SevenZFile.matches(data3, data3.length)); } + + @Override + protected SevenZFile getArchiveFile() throws Exception { + return SevenZFile.builder().setFile(getFile("bla.7z")).get(); + } } diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarFileTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarFileTest.java index 5b8b9a18f..364eafa2b 100644 --- a/src/test/java/org/apache/commons/compress/archivers/tar/TarFileTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarFileTest.java @@ -40,15 +40,14 @@ import java.util.List; import java.util.zip.GZIPInputStream; -import org.apache.commons.compress.AbstractTest; +import org.apache.commons.compress.archivers.AbstractArchiveFileTest; import org.apache.commons.compress.archivers.ArchiveException; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.TimeZones; import org.junit.jupiter.api.Test; -import shaded.org.apache.commons.lang3.StringUtils; - -class TarFileTest extends AbstractTest { +class TarFileTest extends AbstractArchiveFileTest<TarArchiveEntry> { private void datePriorToEpoch(final String archive) throws Exception { try (TarFile tarFile = new TarFile(getPath(archive))) { @@ -379,4 +378,9 @@ void testWorkaroundForBrokenTimeHeader() throws IOException { assertTrue(entry.isCheckSumOK()); } } + + @Override + protected TarFile getArchiveFile() throws Exception { + return new TarFile(getPath("bla.tar")); + } } diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipFileTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipFileTest.java index ba97587aa..62a76a40c 100644 --- a/src/test/java/org/apache/commons/compress/archivers/zip/ZipFileTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipFileTest.java @@ -56,7 +56,7 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipException; -import org.apache.commons.compress.AbstractTest; +import org.apache.commons.compress.archivers.AbstractArchiveFileTest; import org.apache.commons.compress.archivers.ArchiveEntry; import org.apache.commons.compress.archivers.ArchiveException; import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; @@ -72,7 +72,7 @@ import io.airlift.compress.zstd.ZstdInputStream; -class ZipFileTest extends AbstractTest { +class ZipFileTest extends AbstractArchiveFileTest<ZipArchiveEntry> { /** * This Class simulates the case where the Zip File uses the aircompressors {@link ZstdInputStream} @@ -1097,4 +1097,8 @@ void testZstdInputStreamErrorCloseWhenGc() throws Exception { } } + @Override + protected ZipFile getArchiveFile() throws Exception { + return ZipFile.builder().setFile(getFile("bla.zip")).get(); + } }