COMPRESS-394/395 use same "version needed to extract" in LFH and CDH

... and skip data descriptor when copying raw entries


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/d75b8985
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/d75b8985
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/d75b8985

Branch: refs/heads/master
Commit: d75b89856ef29bbec6784cd6aa714eafe0eeb312
Parents: 12edac7
Author: Stefan Bodewig <bode...@apache.org>
Authored: Mon May 22 18:11:06 2017 +0200
Committer: Stefan Bodewig <bode...@apache.org>
Committed: Mon May 22 18:11:06 2017 +0200

----------------------------------------------------------------------
 src/changes/changes.xml                         |   9 +
 .../archivers/zip/ZipArchiveOutputStream.java   |  74 ++++----
 .../archivers/zip/DataDescriptorTest.java       | 189 +++++++++++++++++++
 3 files changed, 239 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/d75b8985/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 203a2fd..3a1b513 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -44,6 +44,15 @@ The <action> type attribute can be add,update,fix,remove.
   <body>
     <release version="1.15" date="not released, yet"
              description="Release 1.15">
+      <action issue="COMPRESS-394" type="fix" date="2017-05-22">
+        Make sure "version needed to extract" in local file header and
+        central directory of a ZIP archive agree with each other.
+        Also ensure the version is set to 2.0 if DEFLATE is used.
+      </action>
+      <action issue="COMPRESS-395" type="fix" date="2017-05-22">
+        Don't use a data descriptor in ZIP archives when copying a raw
+        entry that already knows its size and CRC information.
+      </action>
     </release>
     <release version="1.14" date="2017-05-14"
              description="Release 1.14">

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/d75b8985/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
 
b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
index 077344d..63aeba7 100644
--- 
a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
+++ 
b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
@@ -202,9 +202,9 @@ public class ZipArchiveOutputStream extends 
ArchiveOutputStream {
     private static final byte[] ONE = ZipLong.getBytes(1L);
 
     /**
-     * Holds the offsets of the LFH starts for each entry.
+     * Holds some book-keeping data for each entry.
      */
-    private final Map<ZipArchiveEntry, Long> offsets =
+    private final Map<ZipArchiveEntry, EntryMetaData> metaData =
         new HashMap<>();
 
     /**
@@ -473,7 +473,7 @@ public class ZipArchiveOutputStream extends 
ArchiveOutputStream {
         cdLength = streamCompressor.getTotalBytesWritten() - cdOffset;
         writeZip64CentralDirectory();
         writeCentralDirectoryEnd();
-        offsets.clear();
+        metaData.clear();
         entries.clear();
         streamCompressor.close();
         finished = true;
@@ -539,7 +539,9 @@ public class ZipArchiveOutputStream extends 
ArchiveOutputStream {
             rewriteSizesAndCrc(actuallyNeedsZip64);
         }
 
-        writeDataDescriptor(entry.entry);
+        if (!phased) {
+            writeDataDescriptor(entry.entry);
+        }
         entry = null;
     }
 
@@ -695,7 +697,7 @@ public class ZipArchiveOutputStream extends 
ArchiveOutputStream {
                 // do some cleanup:
                 // * rewrite version needed to extract
                 channel.position(entry.localDataStart  - 5 * SHORT);
-                
writeOut(ZipShort.getBytes(versionNeededToExtractMethod(entry.entry.getMethod())));
+                
writeOut(ZipShort.getBytes(versionNeededToExtract(entry.entry.getMethod(), 
false, false)));
 
                 // * remove ZIP64 extra so it doesn't get written
                 //   to the central directory
@@ -1029,7 +1031,7 @@ public class ZipArchiveOutputStream extends 
ArchiveOutputStream {
 
         final long localHeaderStart = streamCompressor.getTotalBytesWritten();
         final byte[] localHeader = createLocalFileHeader(ze, name, encodable, 
phased, localHeaderStart);
-        offsets.put(ze, localHeaderStart);
+        metaData.put(ze, new EntryMetaData(localHeaderStart, 
usesDataDescriptor(ze.getMethod(), phased)));
         entry.localDataStart = localHeaderStart + LFH_CRC_OFFSET; // At crc 
offset
         writeCounted(localHeader);
         entry.dataStart = streamCompressor.getTotalBytesWritten();
@@ -1070,14 +1072,11 @@ public class ZipArchiveOutputStream extends 
ArchiveOutputStream {
 
         //store method in local variable to prevent multiple method calls
         final int zipMethod = ze.getMethod();
+        final boolean dataDescriptor = usesDataDescriptor(zipMethod, phased);
 
-        if (phased &&  !isZip64Required(entry.entry, zip64Mode)){
-            putShort(versionNeededToExtractMethod(zipMethod), buf, 
LFH_VERSION_NEEDED_OFFSET);
-        } else {
-            putShort(versionNeededToExtract(zipMethod, hasZip64Extra(ze)), 
buf, LFH_VERSION_NEEDED_OFFSET);
-        }
+        putShort(versionNeededToExtract(zipMethod, hasZip64Extra(ze), 
dataDescriptor), buf, LFH_VERSION_NEEDED_OFFSET);
 
-        final GeneralPurposeBit generalPurposeBit = 
getGeneralPurposeBits(zipMethod, !encodable && fallbackToUTF8);
+        final GeneralPurposeBit generalPurposeBit = 
getGeneralPurposeBits(!encodable && fallbackToUTF8, dataDescriptor);
         generalPurposeBit.encode(buf, LFH_GPB_OFFSET);
 
         // compression method
@@ -1169,7 +1168,7 @@ public class ZipArchiveOutputStream extends 
ArchiveOutputStream {
      * @throws IOException on error
      */
     protected void writeDataDescriptor(final ZipArchiveEntry ze) throws 
IOException {
-        if (ze.getMethod() != DEFLATED || channel != null) {
+        if (!usesDataDescriptor(ze.getMethod(), false)) {
             return;
         }
         writeCounted(DD_SIG);
@@ -1198,11 +1197,11 @@ public class ZipArchiveOutputStream extends 
ArchiveOutputStream {
 
     private byte[] createCentralFileHeader(final ZipArchiveEntry ze) throws 
IOException {
 
-        final long lfhOffset = offsets.get(ze);
+        final EntryMetaData entryMetaData = metaData.get(ze);
         final boolean needsZip64Extra = hasZip64Extra(ze)
                 || ze.getCompressedSize() >= ZIP64_MAGIC
                 || ze.getSize() >= ZIP64_MAGIC
-                || lfhOffset >= ZIP64_MAGIC
+                || entryMetaData.offset >= ZIP64_MAGIC
                 || zip64Mode == Zip64Mode.Always;
 
         if (needsZip64Extra && zip64Mode == Zip64Mode.Never) {
@@ -1214,19 +1213,20 @@ public class ZipArchiveOutputStream extends 
ArchiveOutputStream {
         }
 
 
-        handleZip64Extra(ze, lfhOffset, needsZip64Extra);
+        handleZip64Extra(ze, entryMetaData.offset, needsZip64Extra);
 
-        return createCentralFileHeader(ze, getName(ze), lfhOffset, 
needsZip64Extra);
+        return createCentralFileHeader(ze, getName(ze), entryMetaData, 
needsZip64Extra);
     }
 
     /**
      * Writes the central file header entry.
      * @param ze the entry to write
      * @param name The encoded name
-     * @param lfhOffset Local file header offset for this file
+     * @param entryMetaData meta data for this file
      * @throws IOException on error
      */
-    private byte[] createCentralFileHeader(final ZipArchiveEntry ze, final 
ByteBuffer name, final long lfhOffset,
+    private byte[] createCentralFileHeader(final ZipArchiveEntry ze, final 
ByteBuffer name,
+                                           final EntryMetaData entryMetaData,
                                            final boolean needsZip64Extra) 
throws IOException {
         final byte[] extra = ze.getCentralDirectoryExtra();
 
@@ -1251,8 +1251,9 @@ public class ZipArchiveOutputStream extends 
ArchiveOutputStream {
 
         final int zipMethod = ze.getMethod();
         final boolean encodable = zipEncoding.canEncode(ze.getName());
-        putShort(versionNeededToExtract(zipMethod, needsZip64Extra), buf, 
CFH_VERSION_NEEDED_OFFSET);
-        getGeneralPurposeBits(zipMethod, !encodable && 
fallbackToUTF8).encode(buf, CFH_GPB_OFFSET);
+        putShort(versionNeededToExtract(zipMethod, needsZip64Extra, 
entryMetaData.usesDataDescriptor),
+            buf, CFH_VERSION_NEEDED_OFFSET);
+        getGeneralPurposeBits(!encodable && fallbackToUTF8, 
entryMetaData.usesDataDescriptor).encode(buf, CFH_GPB_OFFSET);
 
         // compression method
         putShort(zipMethod, buf, CFH_METHOD_OFFSET);
@@ -1292,10 +1293,10 @@ public class ZipArchiveOutputStream extends 
ArchiveOutputStream {
         putLong(ze.getExternalAttributes(), buf, 
CFH_EXTERNAL_ATTRIBUTES_OFFSET);
 
         // relative offset of LFH
-        if (lfhOffset >= ZIP64_MAGIC || zip64Mode == Zip64Mode.Always) {
+        if (entryMetaData.offset >= ZIP64_MAGIC || zip64Mode == 
Zip64Mode.Always) {
             putLong(ZIP64_MAGIC, buf, CFH_LFH_OFFSET);
         } else {
-            putLong(Math.min(lfhOffset, ZIP64_MAGIC), buf, CFH_LFH_OFFSET);
+            putLong(Math.min(entryMetaData.offset, ZIP64_MAGIC), buf, 
CFH_LFH_OFFSET);
         }
 
         // file name
@@ -1469,28 +1470,27 @@ public class ZipArchiveOutputStream extends 
ArchiveOutputStream {
     }
 
 
-    private GeneralPurposeBit getGeneralPurposeBits(final int zipMethod, final 
boolean utfFallback) {
+    private GeneralPurposeBit getGeneralPurposeBits(final boolean utfFallback, 
boolean usesDataDescriptor) {
         final GeneralPurposeBit b = new GeneralPurposeBit();
         b.useUTF8ForNames(useUTF8Flag || utfFallback);
-        if (isDeflatedToOutputStream(zipMethod)) {
+        if (usesDataDescriptor) {
             b.useDataDescriptor(true);
         }
         return b;
     }
 
-    private int versionNeededToExtract(final int zipMethod, final boolean 
zip64) {
+    private int versionNeededToExtract(final int zipMethod, final boolean 
zip64, final boolean usedDataDescriptor) {
         if (zip64) {
             return ZIP64_MIN_VERSION;
         }
-        // requires version 2 as we are going to store length info
-        // in the data descriptor
-        return isDeflatedToOutputStream(zipMethod)
-            ? DATA_DESCRIPTOR_MIN_VERSION
-            : versionNeededToExtractMethod(zipMethod);
+        if (usedDataDescriptor) {
+            return DATA_DESCRIPTOR_MIN_VERSION;
+        }
+        return versionNeededToExtractMethod(zipMethod);
     }
 
-    private boolean isDeflatedToOutputStream(final int zipMethod) {
-        return zipMethod == DEFLATED && channel == null;
+    private boolean usesDataDescriptor(final int zipMethod, boolean phased) {
+        return !phased && zipMethod == DEFLATED && channel == null;
     }
 
     private int versionNeededToExtractMethod(int zipMethod) {
@@ -1675,4 +1675,12 @@ public class ZipArchiveOutputStream extends 
ArchiveOutputStream {
         private boolean hasWritten;
     }
 
+    private static final class EntryMetaData {
+        private final long offset;
+        private final boolean usesDataDescriptor;
+        private EntryMetaData(long offset, boolean usesDataDescriptor) {
+            this.offset = offset;
+            this.usesDataDescriptor = usesDataDescriptor;
+        }
+    }
 }

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/d75b8985/src/test/java/org/apache/commons/compress/archivers/zip/DataDescriptorTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/commons/compress/archivers/zip/DataDescriptorTest.java
 
b/src/test/java/org/apache/commons/compress/archivers/zip/DataDescriptorTest.java
new file mode 100644
index 0000000..52078de
--- /dev/null
+++ 
b/src/test/java/org/apache/commons/compress/archivers/zip/DataDescriptorTest.java
@@ -0,0 +1,189 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+package org.apache.commons.compress.archivers.zip;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.commons.compress.utils.IOUtils;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.apache.commons.compress.AbstractTestCase.mkdir;
+import static org.apache.commons.compress.AbstractTestCase.rmdir;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+
+public class DataDescriptorTest {
+
+    private File dir;
+
+    @Before
+    public void setUp() throws Exception {
+        dir = mkdir("ddtest");
+    }
+
+    @After
+    public void tearDown() throws Exception {
+        rmdir(dir);
+    }
+
+    @Test
+    public void writesDataDescriptorForDeflatedEntryOnUnseekableOutput() 
throws IOException {
+        ByteArrayOutputStream o = new ByteArrayOutputStream();
+        try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(o)) {
+            zos.putArchiveEntry(new ZipArchiveEntry("test1.txt"));
+            zos.write("foo".getBytes("UTF-8"));
+            zos.closeArchiveEntry();
+        }
+        byte[] data = o.toByteArray();
+
+        byte[] versionInLFH = Arrays.copyOfRange(data, 4, 6);
+        // 2.0 because of DD
+        assertArrayEquals(new byte[] { 20, 0 }, versionInLFH);
+        byte[] gpbInLFH = Arrays.copyOfRange(data, 6, 8);
+        // DD and EFS flags
+        assertArrayEquals(new byte[] { 8, 8 }, gpbInLFH);
+        byte[] crcAndSizedInLFH = Arrays.copyOfRange(data, 14, 26);
+        assertArrayEquals(new byte[12], crcAndSizedInLFH);
+
+        int cdhStart = findCentralDirectory(data);
+        byte[] versionInCDH = Arrays.copyOfRange(data, cdhStart + 6, cdhStart 
+ 8);
+        assertArrayEquals(new byte[] { 20, 0 }, versionInCDH);
+        byte[] gpbInCDH = Arrays.copyOfRange(data, cdhStart + 8, cdhStart + 
10);
+        assertArrayEquals(new byte[] { 8, 8 }, gpbInCDH);
+
+        int ddStart = cdhStart - 16;
+        assertEquals(ZipLong.DD_SIG, new ZipLong(data, ddStart));
+        long crcFromDD = ZipLong.getValue(data, ddStart + 4);
+        long cSizeFromDD = ZipLong.getValue(data, ddStart + 8);
+        long sizeFromDD = ZipLong.getValue(data, ddStart + 12);
+        assertEquals(3, sizeFromDD);
+
+        long crcFromCDH = ZipLong.getValue(data, cdhStart + 16);
+        assertEquals(crcFromDD, crcFromCDH);
+        long cSizeFromCDH = ZipLong.getValue(data, cdhStart + 20);
+        assertEquals(cSizeFromDD, cSizeFromCDH);
+        long sizeFromCDH = ZipLong.getValue(data, cdhStart + 24);
+        assertEquals(sizeFromDD, sizeFromCDH);
+    }
+
+    @Test
+    public void doesntWriteDataDescriptorForDeflatedEntryOnSeekableOutput() 
throws IOException {
+        File f = new File(dir, "test.zip");
+        try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(f)) {
+            zos.putArchiveEntry(new ZipArchiveEntry("test1.txt"));
+            zos.write("foo".getBytes("UTF-8"));
+            zos.closeArchiveEntry();
+        }
+
+        byte[] data;
+        try (FileInputStream fis = new FileInputStream(f)) {
+            data = IOUtils.toByteArray(fis);
+        }
+
+        byte[] versionInLFH = Arrays.copyOfRange(data, 4, 6);
+        // still 2.0 because of Deflate
+        assertArrayEquals(new byte[] { 20, 0 }, versionInLFH);
+        byte[] gpbInLFH = Arrays.copyOfRange(data, 6, 8);
+        // no DD but EFS flag
+        assertArrayEquals(new byte[] { 0, 8 }, gpbInLFH);
+
+        int cdhStart = findCentralDirectory(data);
+        byte[] versionInCDH = Arrays.copyOfRange(data, cdhStart + 6, cdhStart 
+ 8);
+        assertArrayEquals(new byte[] { 20, 0 }, versionInCDH);
+        byte[] gpbInCDH = Arrays.copyOfRange(data, cdhStart + 8, cdhStart + 
10);
+        assertArrayEquals(new byte[] { 0, 8 }, gpbInCDH);
+
+        int ddStart = cdhStart - 16;
+        assertNotEquals(ZipLong.DD_SIG, new ZipLong(data, ddStart));
+        long crcFromLFH = ZipLong.getValue(data, 14);
+        long cSizeFromLFH = ZipLong.getValue(data, 18);
+        long sizeFromLFH = ZipLong.getValue(data, 22);
+        assertEquals(3, sizeFromLFH);
+
+        long crcFromCDH = ZipLong.getValue(data, cdhStart + 16);
+        assertEquals(crcFromLFH, crcFromCDH);
+        long cSizeFromCDH = ZipLong.getValue(data, cdhStart + 20);
+        assertEquals(cSizeFromLFH, cSizeFromCDH);
+        long sizeFromCDH = ZipLong.getValue(data, cdhStart + 24);
+        assertEquals(sizeFromLFH, sizeFromCDH);
+    }
+
+    @Test
+    public void doesntWriteDataDescriptorWhenAddingRawEntries() throws 
IOException {
+        ByteArrayOutputStream init = new ByteArrayOutputStream();
+        try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(init)) {
+            zos.putArchiveEntry(new ZipArchiveEntry("test1.txt"));
+            zos.write("foo".getBytes("UTF-8"));
+            zos.closeArchiveEntry();
+        }
+
+        File f = new File(dir, "test.zip");
+        try (FileOutputStream fos = new FileOutputStream(f)) {
+            fos.write(init.toByteArray());
+        }
+
+        ByteArrayOutputStream o = new ByteArrayOutputStream();
+        ZipArchiveEntry zae;
+        try (ZipFile zf = new ZipFile(f);
+             ZipArchiveOutputStream zos = new ZipArchiveOutputStream(o)) {
+            zae = zf.getEntry("test1.txt");
+            zos.addRawArchiveEntry(zae, zf.getRawInputStream(zae));
+        }
+
+        byte[] data = o.toByteArray();
+        byte[] versionInLFH = Arrays.copyOfRange(data, 4, 6);
+        // still 2.0 because of Deflate
+        assertArrayEquals(new byte[] { 20, 0 }, versionInLFH);
+        byte[] gpbInLFH = Arrays.copyOfRange(data, 6, 8);
+        // no DD but EFS flag
+        assertArrayEquals(new byte[] { 0, 8 }, gpbInLFH);
+
+        int cdhStart = findCentralDirectory(data);
+        byte[] versionInCDH = Arrays.copyOfRange(data, cdhStart + 6, cdhStart 
+ 8);
+        assertArrayEquals(new byte[] { 20, 0 }, versionInCDH);
+        byte[] gpbInCDH = Arrays.copyOfRange(data, cdhStart + 8, cdhStart + 
10);
+        assertArrayEquals(new byte[] { 0, 8 }, gpbInCDH);
+
+        int ddStart = cdhStart - 16;
+        assertNotEquals(ZipLong.DD_SIG, new ZipLong(data, ddStart));
+        long crcFromLFH = ZipLong.getValue(data, 14);
+        long cSizeFromLFH = ZipLong.getValue(data, 18);
+        long sizeFromLFH = ZipLong.getValue(data, 22);
+        assertEquals(3, sizeFromLFH);
+
+        long crcFromCDH = ZipLong.getValue(data, cdhStart + 16);
+        assertEquals(crcFromLFH, crcFromCDH);
+        long cSizeFromCDH = ZipLong.getValue(data, cdhStart + 20);
+        assertEquals(cSizeFromLFH, cSizeFromCDH);
+        long sizeFromCDH = ZipLong.getValue(data, cdhStart + 24);
+        assertEquals(sizeFromLFH, sizeFromCDH);
+    }
+
+    private int findCentralDirectory(byte[] data) {
+        // not a ZIP64 archive, no comment, "End of central directory record" 
at the end
+        return (int) ZipLong.getValue(data, data.length - 22 + 16);
+    }
+}

Reply via email to