This is an automated email from the ASF dual-hosted git repository. bodewig pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-compress.git
commit f196253cc9794ac682679854e68c3184ab938618 Author: Stefan Bodewig <bode...@apache.org> AuthorDate: Sat May 1 15:27:50 2021 +0200 COMPRESS-575 extract and harden sorting of sparse structs --- .../compress/archivers/tar/TarArchiveEntry.java | 35 ++++++++++++++++++++ .../archivers/tar/TarArchiveInputStream.java | 27 ++++----------- .../archivers/tar/TarArchiveSparseEntry.java | 4 +++ .../commons/compress/archivers/tar/TarFile.java | 33 +++++-------------- .../commons/compress/archivers/tar/TarUtils.java | 5 +-- .../compress/archivers/tar/SparseFilesTest.java | 38 +++++++++++++++++++--- .../archivers/tar/TarArchiveEntryTest.java | 28 ++++++++++++++++ 7 files changed, 118 insertions(+), 52 deletions(-) diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java index fdeb0de..de9f26f 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java @@ -30,6 +30,7 @@ import java.nio.file.attribute.FileTime; import java.nio.file.attribute.PosixFileAttributes; import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; import java.util.Date; import java.util.HashMap; import java.util.Iterator; @@ -38,6 +39,7 @@ import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; import org.apache.commons.compress.archivers.ArchiveEntry; import org.apache.commons.compress.archivers.EntryStreamOffsets; @@ -930,6 +932,39 @@ public class TarArchiveEntry implements ArchiveEntry, TarConstants, EntryStreamO } /** + * Get this entry's sparse headers ordered by offset with all empty sparse sections at the start filtered out. + * + * @return immutable list of this entry's sparse headers, never null + * @since 1.21 + * @throws IOException if the list of sparse headers contains blocks that overlap + */ + public List<TarArchiveStructSparse> getOrderedSparseHeaders() throws IOException { + if (sparseHeaders == null || sparseHeaders.isEmpty()) { + return Collections.emptyList(); + } + final List<TarArchiveStructSparse> orderedAndFiltered = sparseHeaders.stream() + .filter(s -> s.getOffset() > 0 || s.getNumbytes() > 0) + .sorted(Comparator.comparingLong(TarArchiveStructSparse::getOffset)) + .collect(Collectors.toList()); + + for (int i = 0; i < orderedAndFiltered.size(); i++) { + final TarArchiveStructSparse str = orderedAndFiltered.get(i); + if (i + 1 < orderedAndFiltered.size()) { + if (str.getOffset() + str.getNumbytes() > orderedAndFiltered.get(i + 1).getOffset()) { + throw new IOException("Corrupted TAR archive. Sparse blocks for " + + getName() + " overlap each other."); + } + } + if (str.getOffset() + str.getNumbytes() < 0) { + // integer overflow? + throw new IOException("Unreadable TAR archive. Offset and numbytes for sparse block in " + + getName() + " too large."); + } + } + return orderedAndFiltered; + } + + /** * Get if this entry is a sparse file with 1.X PAX Format or not * * @return True if this entry is a sparse file with 1.X PAX Format diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java index 4aaeba9..902af1f 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java @@ -28,7 +28,6 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; -import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -913,32 +912,21 @@ public class TarArchiveInputStream extends ArchiveInputStream { currentSparseInputStreamIndex = -1; sparseInputStreams = new ArrayList<>(); - final List<TarArchiveStructSparse> sparseHeaders = currEntry.getSparseHeaders(); - // sort the sparse headers in case they are written in wrong order - if (sparseHeaders != null && sparseHeaders.size() > 1) { - final Comparator<TarArchiveStructSparse> sparseHeaderComparator = (p, q) -> { - final Long pOffset = p.getOffset(); - final Long qOffset = q.getOffset(); - return pOffset.compareTo(qOffset); - }; - sparseHeaders.sort(sparseHeaderComparator); - } + final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders(); - if (sparseHeaders != null) { // Stream doesn't need to be closed at all as it doesn't use any resources final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); //NOSONAR + // logical offset into the extracted entry long offset = 0; for (final TarArchiveStructSparse sparseHeader : sparseHeaders) { - if (sparseHeader.getOffset() == 0 && sparseHeader.getNumbytes() == 0) { - break; - } - - if ((sparseHeader.getOffset() - offset) < 0) { + final long zeroBlockSize = sparseHeader.getOffset() - offset; + if (zeroBlockSize < 0) { + // sparse header says to move backwards inside of the extracted entry throw new IOException("Corrupted struct sparse detected"); } - // only store the input streams with non-zero size - if ((sparseHeader.getOffset() - offset) > 0) { + // only store the zero block if it is not empty + if (zeroBlockSize > 0) { sparseInputStreams.add(new BoundedInputStream(zeroInputStream, sparseHeader.getOffset() - offset)); } @@ -949,7 +937,6 @@ public class TarArchiveInputStream extends ArchiveInputStream { offset = sparseHeader.getOffset() + sparseHeader.getNumbytes(); } - } if (!sparseInputStreams.isEmpty()) { currentSparseInputStreamIndex = 0; diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseEntry.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseEntry.java index 6973194..2d5a124 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseEntry.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseEntry.java @@ -40,6 +40,10 @@ import java.util.List; * char numbytes[12]; // offset 12 * }; * </pre> + * + * <p>Each such struct describes a block of data that has actually been written to the archive. The offset describes + * where in the extracted file the data is supposed to start and the numbytes provides the length of the block. When + * extracting the entry the gaps between the sparse structs are equivalent to areas filled with zero bytes.</p> */ public class TarArchiveSparseEntry implements TarConstants { diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java index 9b82025..6bcd701 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java @@ -27,8 +27,6 @@ import java.nio.channels.SeekableByteChannel; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.LinkedList; import java.util.List; @@ -337,36 +335,24 @@ public class TarFile implements Closeable { private void buildSparseInputStreams() throws IOException { final List<InputStream> streams = new ArrayList<>(); - final List<TarArchiveStructSparse> sparseHeaders = currEntry.getSparseHeaders(); - // sort the sparse headers in case they are written in wrong order - if (sparseHeaders != null && sparseHeaders.size() > 1) { - final Comparator<TarArchiveStructSparse> sparseHeaderComparator = (p, q) -> { - Long pOffset = p.getOffset(); - Long qOffset = q.getOffset(); - return pOffset.compareTo(qOffset); - }; - Collections.sort(sparseHeaders, sparseHeaderComparator); - } + final List<TarArchiveStructSparse> sparseHeaders = currEntry.getOrderedSparseHeaders(); - if (sparseHeaders != null) { // Stream doesn't need to be closed at all as it doesn't use any resources final InputStream zeroInputStream = new TarArchiveSparseZeroInputStream(); //NOSONAR + // logical offset into the extracted entry long offset = 0; long numberOfZeroBytesInSparseEntry = 0; for (TarArchiveStructSparse sparseHeader : sparseHeaders) { - if (sparseHeader.getOffset() == 0 && sparseHeader.getNumbytes() == 0) { - break; - } - - if ((sparseHeader.getOffset() - offset) < 0) { + final long zeroBlockSize = sparseHeader.getOffset() - offset; + if (zeroBlockSize < 0) { + // sparse header says to move backwards inside of the extracted entry throw new IOException("Corrupted struct sparse detected"); } - // only store the input streams with non-zero size - if ((sparseHeader.getOffset() - offset) > 0) { - final long sizeOfZeroByteStream = sparseHeader.getOffset() - offset; - streams.add(new BoundedInputStream(zeroInputStream, sizeOfZeroByteStream)); - numberOfZeroBytesInSparseEntry += sizeOfZeroByteStream; + // only store the zero block if it is not empty + if (zeroBlockSize > 0) { + streams.add(new BoundedInputStream(zeroInputStream, zeroBlockSize)); + numberOfZeroBytesInSparseEntry += zeroBlockSize; } // only store the input streams with non-zero size @@ -378,7 +364,6 @@ public class TarFile implements Closeable { offset = sparseHeader.getOffset() + sparseHeader.getNumbytes(); } - } sparseInputStreams.put(currEntry.getName(), streams); } diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java index 3ba7a1b..e010880 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java @@ -344,10 +344,7 @@ public class TarUtils { if (sparseHeader.getNumbytes() < 0) { throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes"); } - // some sparse headers are empty, we need to skip these sparse headers - if (sparseHeader.getOffset() > 0 || sparseHeader.getNumbytes() > 0) { - sparseHeaders.add(sparseHeader); - } + sparseHeaders.add(sparseHeader); } catch (IllegalArgumentException ex) { // thrown internally by parseOctalOrBinary throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex); diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java index d00f9e1..9faa7d9 100644 --- a/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java @@ -56,7 +56,7 @@ public class SparseFilesTest extends AbstractTestCase { assertTrue(tin.canReadEntryData(ae)); final List<TarArchiveStructSparse> sparseHeaders = ae.getSparseHeaders(); - assertEquals(3, sparseHeaders.size()); + assertEquals(4, sparseHeaders.size()); assertEquals(0, sparseHeaders.get(0).getOffset()); assertEquals(2048, sparseHeaders.get(0).getNumbytes()); @@ -66,6 +66,21 @@ public class SparseFilesTest extends AbstractTestCase { assertEquals(3101184L, sparseHeaders.get(2).getOffset()); assertEquals(0, sparseHeaders.get(2).getNumbytes()); + + assertEquals(0, sparseHeaders.get(3).getOffset()); + assertEquals(0, sparseHeaders.get(3).getNumbytes()); + + final List<TarArchiveStructSparse> sparseOrderedHeaders = ae.getOrderedSparseHeaders(); + assertEquals(3, sparseOrderedHeaders.size()); + + assertEquals(0, sparseOrderedHeaders.get(0).getOffset()); + assertEquals(2048, sparseOrderedHeaders.get(0).getNumbytes()); + + assertEquals(1050624L, sparseOrderedHeaders.get(1).getOffset()); + assertEquals(2560, sparseOrderedHeaders.get(1).getNumbytes()); + + assertEquals(3101184L, sparseOrderedHeaders.get(2).getOffset()); + assertEquals(0, sparseOrderedHeaders.get(2).getNumbytes()); } } @@ -80,7 +95,7 @@ public class SparseFilesTest extends AbstractTestCase { assertFalse(ae.isPaxGNUSparse()); List<TarArchiveStructSparse> sparseHeaders = ae.getSparseHeaders(); - assertEquals(3, sparseHeaders.size()); + assertEquals(4, sparseHeaders.size()); assertEquals(0, sparseHeaders.get(0).getOffset()); assertEquals(2048, sparseHeaders.get(0).getNumbytes()); @@ -90,6 +105,21 @@ public class SparseFilesTest extends AbstractTestCase { assertEquals(3101184L, sparseHeaders.get(2).getOffset()); assertEquals(0, sparseHeaders.get(2).getNumbytes()); + + assertEquals(0, sparseHeaders.get(3).getOffset()); + assertEquals(0, sparseHeaders.get(3).getNumbytes()); + + final List<TarArchiveStructSparse> sparseOrderedHeaders = ae.getOrderedSparseHeaders(); + assertEquals(3, sparseOrderedHeaders.size()); + + assertEquals(0, sparseOrderedHeaders.get(0).getOffset()); + assertEquals(2048, sparseOrderedHeaders.get(0).getNumbytes()); + + assertEquals(1050624L, sparseOrderedHeaders.get(1).getOffset()); + assertEquals(2560, sparseOrderedHeaders.get(1).getNumbytes()); + + assertEquals(3101184L, sparseOrderedHeaders.get(2).getOffset()); + assertEquals(0, sparseOrderedHeaders.get(2).getNumbytes()); } } @@ -260,7 +290,7 @@ public class SparseFilesTest extends AbstractTestCase { assertArrayEquals(IOUtils.toByteArray(tin), IOUtils.toByteArray(sparseFileInputStream)); - final List<TarArchiveStructSparse> sparseHeaders = ae.getSparseHeaders(); + final List<TarArchiveStructSparse> sparseHeaders = ae.getOrderedSparseHeaders(); assertEquals(7, sparseHeaders.size()); assertEquals(0, sparseHeaders.get(0).getOffset()); @@ -299,7 +329,7 @@ public class SparseFilesTest extends AbstractTestCase { assertArrayEquals(IOUtils.toByteArray(tarInput), IOUtils.toByteArray(sparseFileInputStream)); } - List<TarArchiveStructSparse> sparseHeaders = ae.getSparseHeaders(); + List<TarArchiveStructSparse> sparseHeaders = ae.getOrderedSparseHeaders(); assertEquals(7, sparseHeaders.size()); assertEquals(0, sparseHeaders.get(0).getOffset()); diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveEntryTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveEntryTest.java index 988a469..47195ac 100644 --- a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveEntryTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveEntryTest.java @@ -34,6 +34,8 @@ import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; +import java.util.Arrays; +import java.util.List; import java.util.Locale; import org.apache.commons.compress.AbstractTestCase; import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; @@ -271,6 +273,32 @@ public class TarArchiveEntryTest implements TarConstants { new TarArchiveEntry("test").setDataOffset(-1); } + @Test + public void getOrderedSparseHeadersSortsAndFiltersSparseStructs() throws Exception { + final TarArchiveEntry te = new TarArchiveEntry("test"); + te.setSparseHeaders(Arrays.asList(new TarArchiveStructSparse(10, 2), new TarArchiveStructSparse(20, 0), + new TarArchiveStructSparse(15, 1), new TarArchiveStructSparse(0, 0))); + final List<TarArchiveStructSparse> strs = te.getOrderedSparseHeaders(); + assertEquals(3, strs.size()); + assertEquals(10, strs.get(0).getOffset()); + assertEquals(15, strs.get(1).getOffset()); + assertEquals(20, strs.get(2).getOffset()); + } + + @Test(expected = IOException.class) + public void getOrderedSparseHeadersRejectsOverlappingStructs() throws Exception { + final TarArchiveEntry te = new TarArchiveEntry("test"); + te.setSparseHeaders(Arrays.asList(new TarArchiveStructSparse(10, 5), new TarArchiveStructSparse(12, 1))); + te.getOrderedSparseHeaders(); + } + + @Test(expected = IOException.class) + public void getOrderedSparseHeadersRejectsStructsWithReallyBigNumbers() throws Exception { + final TarArchiveEntry te = new TarArchiveEntry("test"); + te.setSparseHeaders(Arrays.asList(new TarArchiveStructSparse(Long.MAX_VALUE, 2))); + te.getOrderedSparseHeaders(); + } + private void assertGnuMagic(final TarArchiveEntry t) { assertEquals(MAGIC_GNU + VERSION_GNU_SPACE, readMagic(t)); }