This is an automated email from the ASF dual-hosted git repository.

bodewig pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-compress.git

commit f196253cc9794ac682679854e68c3184ab938618
Author: Stefan Bodewig <bode...@apache.org>
AuthorDate: Sat May 1 15:27:50 2021 +0200

    COMPRESS-575 extract and harden sorting of sparse structs
---
 .../compress/archivers/tar/TarArchiveEntry.java    | 35 ++++++++++++++++++++
 .../archivers/tar/TarArchiveInputStream.java       | 27 ++++-----------
 .../archivers/tar/TarArchiveSparseEntry.java       |  4 +++
 .../commons/compress/archivers/tar/TarFile.java    | 33 +++++--------------
 .../commons/compress/archivers/tar/TarUtils.java   |  5 +--
 .../compress/archivers/tar/SparseFilesTest.java    | 38 +++++++++++++++++++---
 .../archivers/tar/TarArchiveEntryTest.java         | 28 ++++++++++++++++
 7 files changed, 118 insertions(+), 52 deletions(-)

diff --git 
a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java 
b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
index fdeb0de..de9f26f 100644
--- 
a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
+++ 
b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
@@ -30,6 +30,7 @@ import java.nio.file.attribute.FileTime;
 import java.nio.file.attribute.PosixFileAttributes;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.Iterator;
@@ -38,6 +39,7 @@ import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
 
 import org.apache.commons.compress.archivers.ArchiveEntry;
 import org.apache.commons.compress.archivers.EntryStreamOffsets;
@@ -930,6 +932,39 @@ public class TarArchiveEntry implements ArchiveEntry, 
TarConstants, EntryStreamO
     }
 
     /**
+     * Get this entry's sparse headers ordered by offset with all empty sparse 
sections at the start filtered out.
+     *
+     * @return immutable list of this entry's sparse headers, never null
+     * @since 1.21
+     * @throws IOException if the list of sparse headers contains blocks that 
overlap
+     */
+    public List<TarArchiveStructSparse> getOrderedSparseHeaders() throws 
IOException {
+        if (sparseHeaders == null || sparseHeaders.isEmpty()) {
+            return Collections.emptyList();
+        }
+        final List<TarArchiveStructSparse> orderedAndFiltered = 
sparseHeaders.stream()
+            .filter(s -> s.getOffset() > 0 || s.getNumbytes() > 0)
+            
.sorted(Comparator.comparingLong(TarArchiveStructSparse::getOffset))
+            .collect(Collectors.toList());
+
+        for (int i = 0; i < orderedAndFiltered.size(); i++) {
+            final TarArchiveStructSparse str = orderedAndFiltered.get(i);
+            if (i + 1 < orderedAndFiltered.size()) {
+                if (str.getOffset() + str.getNumbytes() > 
orderedAndFiltered.get(i + 1).getOffset()) {
+                    throw new IOException("Corrupted TAR archive. Sparse 
blocks for "
+                        + getName() + " overlap each other.");
+                }
+            }
+            if (str.getOffset() + str.getNumbytes() < 0) {
+                // integer overflow?
+                throw new IOException("Unreadable TAR archive. Offset and 
numbytes for sparse block in "
+                    + getName() + " too large.");
+            }
+        }
+        return orderedAndFiltered;
+    }
+
+    /**
      * Get if this entry is a sparse file with 1.X PAX Format or not
      *
      * @return True if this entry is a sparse file with 1.X PAX Format
diff --git 
a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
 
b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
index 4aaeba9..902af1f 100644
--- 
a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
+++ 
b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
@@ -28,7 +28,6 @@ import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -913,32 +912,21 @@ public class TarArchiveInputStream extends 
ArchiveInputStream {
         currentSparseInputStreamIndex = -1;
         sparseInputStreams = new ArrayList<>();
 
-        final List<TarArchiveStructSparse> sparseHeaders = 
currEntry.getSparseHeaders();
-        // sort the sparse headers in case they are written in wrong order
-        if (sparseHeaders != null && sparseHeaders.size() > 1) {
-            final Comparator<TarArchiveStructSparse> sparseHeaderComparator = 
(p, q) -> {
-                final Long pOffset = p.getOffset();
-                final Long qOffset = q.getOffset();
-                return pOffset.compareTo(qOffset);
-            };
-            sparseHeaders.sort(sparseHeaderComparator);
-        }
+        final List<TarArchiveStructSparse> sparseHeaders = 
currEntry.getOrderedSparseHeaders();
 
-        if (sparseHeaders != null) {
             // Stream doesn't need to be closed at all as it doesn't use any 
resources
             final InputStream zeroInputStream = new 
TarArchiveSparseZeroInputStream(); //NOSONAR
+            // logical offset into the extracted entry
             long offset = 0;
             for (final TarArchiveStructSparse sparseHeader : sparseHeaders) {
-                if (sparseHeader.getOffset() == 0 && 
sparseHeader.getNumbytes() == 0) {
-                    break;
-                }
-
-                if ((sparseHeader.getOffset() - offset) < 0) {
+                final long zeroBlockSize = sparseHeader.getOffset() - offset;
+                if (zeroBlockSize < 0) {
+                    // sparse header says to move backwards inside of the 
extracted entry
                     throw new IOException("Corrupted struct sparse detected");
                 }
 
-                // only store the input streams with non-zero size
-                if ((sparseHeader.getOffset() - offset) > 0) {
+                // only store the zero block if it is not empty
+                if (zeroBlockSize > 0) {
                     sparseInputStreams.add(new 
BoundedInputStream(zeroInputStream, sparseHeader.getOffset() - offset));
                 }
 
@@ -949,7 +937,6 @@ public class TarArchiveInputStream extends 
ArchiveInputStream {
 
                 offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
             }
-        }
 
         if (!sparseInputStreams.isEmpty()) {
             currentSparseInputStreamIndex = 0;
diff --git 
a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseEntry.java
 
b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseEntry.java
index 6973194..2d5a124 100644
--- 
a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseEntry.java
+++ 
b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveSparseEntry.java
@@ -40,6 +40,10 @@ import java.util.List;
  * char numbytes[12]; // offset 12
  * };
  * </pre>
+ *
+ * <p>Each such struct describes a block of data that has actually been 
written to the archive. The offset describes
+ * where in the extracted file the data is supposed to start and the numbytes 
provides the length of the block. When
+ * extracting the entry the gaps between the sparse structs are equivalent to 
areas filled with zero bytes.</p>
  */
 
 public class TarArchiveSparseEntry implements TarConstants {
diff --git 
a/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java 
b/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java
index 9b82025..6bcd701 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java
@@ -27,8 +27,6 @@ import java.nio.channels.SeekableByteChannel;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
@@ -337,36 +335,24 @@ public class TarFile implements Closeable {
     private void buildSparseInputStreams() throws IOException {
         final List<InputStream> streams = new ArrayList<>();
 
-        final List<TarArchiveStructSparse> sparseHeaders = 
currEntry.getSparseHeaders();
-        // sort the sparse headers in case they are written in wrong order
-        if (sparseHeaders != null && sparseHeaders.size() > 1) {
-            final Comparator<TarArchiveStructSparse> sparseHeaderComparator = 
(p, q) -> {
-                Long pOffset = p.getOffset();
-                Long qOffset = q.getOffset();
-                return pOffset.compareTo(qOffset);
-            };
-            Collections.sort(sparseHeaders, sparseHeaderComparator);
-        }
+        final List<TarArchiveStructSparse> sparseHeaders = 
currEntry.getOrderedSparseHeaders();
 
-        if (sparseHeaders != null) {
             // Stream doesn't need to be closed at all as it doesn't use any 
resources
             final InputStream zeroInputStream = new 
TarArchiveSparseZeroInputStream(); //NOSONAR
+            // logical offset into the extracted entry
             long offset = 0;
             long numberOfZeroBytesInSparseEntry = 0;
             for (TarArchiveStructSparse sparseHeader : sparseHeaders) {
-                if (sparseHeader.getOffset() == 0 && 
sparseHeader.getNumbytes() == 0) {
-                    break;
-                }
-
-                if ((sparseHeader.getOffset() - offset) < 0) {
+                final long zeroBlockSize = sparseHeader.getOffset() - offset;
+                if (zeroBlockSize < 0) {
+                    // sparse header says to move backwards inside of the 
extracted entry
                     throw new IOException("Corrupted struct sparse detected");
                 }
 
-                // only store the input streams with non-zero size
-                if ((sparseHeader.getOffset() - offset) > 0) {
-                    final long sizeOfZeroByteStream = sparseHeader.getOffset() 
- offset;
-                    streams.add(new BoundedInputStream(zeroInputStream, 
sizeOfZeroByteStream));
-                    numberOfZeroBytesInSparseEntry += sizeOfZeroByteStream;
+                // only store the zero block if it is not empty
+                if (zeroBlockSize > 0) {
+                    streams.add(new BoundedInputStream(zeroInputStream, 
zeroBlockSize));
+                    numberOfZeroBytesInSparseEntry += zeroBlockSize;
                 }
 
                 // only store the input streams with non-zero size
@@ -378,7 +364,6 @@ public class TarFile implements Closeable {
 
                 offset = sparseHeader.getOffset() + sparseHeader.getNumbytes();
             }
-        }
 
         sparseInputStreams.put(currEntry.getName(), streams);
     }
diff --git 
a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java 
b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
index 3ba7a1b..e010880 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
@@ -344,10 +344,7 @@ public class TarUtils {
                 if (sparseHeader.getNumbytes() < 0) {
                     throw new IOException("Corrupted TAR archive, sparse entry 
with negative numbytes");
                 }
-                // some sparse headers are empty, we need to skip these sparse 
headers
-                if (sparseHeader.getOffset() > 0 || sparseHeader.getNumbytes() 
> 0) {
-                    sparseHeaders.add(sparseHeader);
-                }
+                sparseHeaders.add(sparseHeader);
             } catch (IllegalArgumentException ex) {
                 // thrown internally by parseOctalOrBinary
                 throw new IOException("Corrupted TAR archive, sparse entry is 
invalid", ex);
diff --git 
a/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java 
b/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java
index d00f9e1..9faa7d9 100644
--- 
a/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java
+++ 
b/src/test/java/org/apache/commons/compress/archivers/tar/SparseFilesTest.java
@@ -56,7 +56,7 @@ public class SparseFilesTest extends AbstractTestCase {
             assertTrue(tin.canReadEntryData(ae));
 
             final List<TarArchiveStructSparse> sparseHeaders = 
ae.getSparseHeaders();
-            assertEquals(3, sparseHeaders.size());
+            assertEquals(4, sparseHeaders.size());
 
             assertEquals(0, sparseHeaders.get(0).getOffset());
             assertEquals(2048, sparseHeaders.get(0).getNumbytes());
@@ -66,6 +66,21 @@ public class SparseFilesTest extends AbstractTestCase {
 
             assertEquals(3101184L, sparseHeaders.get(2).getOffset());
             assertEquals(0, sparseHeaders.get(2).getNumbytes());
+
+            assertEquals(0, sparseHeaders.get(3).getOffset());
+            assertEquals(0, sparseHeaders.get(3).getNumbytes());
+
+            final List<TarArchiveStructSparse> sparseOrderedHeaders = 
ae.getOrderedSparseHeaders();
+            assertEquals(3, sparseOrderedHeaders.size());
+
+            assertEquals(0, sparseOrderedHeaders.get(0).getOffset());
+            assertEquals(2048, sparseOrderedHeaders.get(0).getNumbytes());
+
+            assertEquals(1050624L, sparseOrderedHeaders.get(1).getOffset());
+            assertEquals(2560, sparseOrderedHeaders.get(1).getNumbytes());
+
+            assertEquals(3101184L, sparseOrderedHeaders.get(2).getOffset());
+            assertEquals(0, sparseOrderedHeaders.get(2).getNumbytes());
         }
     }
 
@@ -80,7 +95,7 @@ public class SparseFilesTest extends AbstractTestCase {
             assertFalse(ae.isPaxGNUSparse());
 
             List<TarArchiveStructSparse> sparseHeaders = ae.getSparseHeaders();
-            assertEquals(3, sparseHeaders.size());
+            assertEquals(4, sparseHeaders.size());
 
             assertEquals(0, sparseHeaders.get(0).getOffset());
             assertEquals(2048, sparseHeaders.get(0).getNumbytes());
@@ -90,6 +105,21 @@ public class SparseFilesTest extends AbstractTestCase {
 
             assertEquals(3101184L, sparseHeaders.get(2).getOffset());
             assertEquals(0, sparseHeaders.get(2).getNumbytes());
+
+            assertEquals(0, sparseHeaders.get(3).getOffset());
+            assertEquals(0, sparseHeaders.get(3).getNumbytes());
+
+            final List<TarArchiveStructSparse> sparseOrderedHeaders = 
ae.getOrderedSparseHeaders();
+            assertEquals(3, sparseOrderedHeaders.size());
+
+            assertEquals(0, sparseOrderedHeaders.get(0).getOffset());
+            assertEquals(2048, sparseOrderedHeaders.get(0).getNumbytes());
+
+            assertEquals(1050624L, sparseOrderedHeaders.get(1).getOffset());
+            assertEquals(2560, sparseOrderedHeaders.get(1).getNumbytes());
+
+            assertEquals(3101184L, sparseOrderedHeaders.get(2).getOffset());
+            assertEquals(0, sparseOrderedHeaders.get(2).getNumbytes());
         }
     }
 
@@ -260,7 +290,7 @@ public class SparseFilesTest extends AbstractTestCase {
             assertArrayEquals(IOUtils.toByteArray(tin),
                 IOUtils.toByteArray(sparseFileInputStream));
 
-            final List<TarArchiveStructSparse> sparseHeaders = 
ae.getSparseHeaders();
+            final List<TarArchiveStructSparse> sparseHeaders = 
ae.getOrderedSparseHeaders();
             assertEquals(7, sparseHeaders.size());
 
             assertEquals(0, sparseHeaders.get(0).getOffset());
@@ -299,7 +329,7 @@ public class SparseFilesTest extends AbstractTestCase {
                 assertArrayEquals(IOUtils.toByteArray(tarInput), 
IOUtils.toByteArray(sparseFileInputStream));
             }
 
-            List<TarArchiveStructSparse> sparseHeaders = ae.getSparseHeaders();
+            List<TarArchiveStructSparse> sparseHeaders = 
ae.getOrderedSparseHeaders();
             assertEquals(7, sparseHeaders.size());
 
             assertEquals(0, sparseHeaders.get(0).getOffset());
diff --git 
a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveEntryTest.java
 
b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveEntryTest.java
index 988a469..47195ac 100644
--- 
a/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveEntryTest.java
+++ 
b/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveEntryTest.java
@@ -34,6 +34,8 @@ import java.io.File;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
+import java.util.Arrays;
+import java.util.List;
 import java.util.Locale;
 import org.apache.commons.compress.AbstractTestCase;
 import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
@@ -271,6 +273,32 @@ public class TarArchiveEntryTest implements TarConstants {
         new TarArchiveEntry("test").setDataOffset(-1);
     }
 
+    @Test
+    public void getOrderedSparseHeadersSortsAndFiltersSparseStructs() throws 
Exception {
+        final TarArchiveEntry te = new TarArchiveEntry("test");
+        te.setSparseHeaders(Arrays.asList(new TarArchiveStructSparse(10, 2), 
new TarArchiveStructSparse(20, 0),
+            new TarArchiveStructSparse(15, 1), new TarArchiveStructSparse(0, 
0)));
+        final List<TarArchiveStructSparse> strs = te.getOrderedSparseHeaders();
+        assertEquals(3, strs.size());
+        assertEquals(10, strs.get(0).getOffset());
+        assertEquals(15, strs.get(1).getOffset());
+        assertEquals(20, strs.get(2).getOffset());
+    }
+
+    @Test(expected = IOException.class)
+    public void getOrderedSparseHeadersRejectsOverlappingStructs() throws 
Exception {
+        final TarArchiveEntry te = new TarArchiveEntry("test");
+        te.setSparseHeaders(Arrays.asList(new TarArchiveStructSparse(10, 5), 
new TarArchiveStructSparse(12, 1)));
+        te.getOrderedSparseHeaders();
+    }
+
+    @Test(expected = IOException.class)
+    public void getOrderedSparseHeadersRejectsStructsWithReallyBigNumbers() 
throws Exception {
+        final TarArchiveEntry te = new TarArchiveEntry("test");
+        te.setSparseHeaders(Arrays.asList(new 
TarArchiveStructSparse(Long.MAX_VALUE, 2)));
+        te.getOrderedSparseHeaders();
+    }
+
     private void assertGnuMagic(final TarArchiveEntry t) {
         assertEquals(MAGIC_GNU + VERSION_GNU_SPACE, readMagic(t));
     }

Reply via email to