COMPRESS-394/395 use same "version needed to extract" in LFH and CDH
... and skip data descriptor when copying raw entries Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/d75b8985 Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/d75b8985 Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/d75b8985 Branch: refs/heads/master Commit: d75b89856ef29bbec6784cd6aa714eafe0eeb312 Parents: 12edac7 Author: Stefan Bodewig <bode...@apache.org> Authored: Mon May 22 18:11:06 2017 +0200 Committer: Stefan Bodewig <bode...@apache.org> Committed: Mon May 22 18:11:06 2017 +0200 ---------------------------------------------------------------------- src/changes/changes.xml | 9 + .../archivers/zip/ZipArchiveOutputStream.java | 74 ++++---- .../archivers/zip/DataDescriptorTest.java | 189 +++++++++++++++++++ 3 files changed, 239 insertions(+), 33 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-compress/blob/d75b8985/src/changes/changes.xml ---------------------------------------------------------------------- diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 203a2fd..3a1b513 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -44,6 +44,15 @@ The <action> type attribute can be add,update,fix,remove. <body> <release version="1.15" date="not released, yet" description="Release 1.15"> + <action issue="COMPRESS-394" type="fix" date="2017-05-22"> + Make sure "version needed to extract" in local file header and + central directory of a ZIP archive agree with each other. + Also ensure the version is set to 2.0 if DEFLATE is used. + </action> + <action issue="COMPRESS-395" type="fix" date="2017-05-22"> + Don't use a data descriptor in ZIP archives when copying a raw + entry that already knows its size and CRC information. + </action> </release> <release version="1.14" date="2017-05-14" description="Release 1.14"> http://git-wip-us.apache.org/repos/asf/commons-compress/blob/d75b8985/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java index 077344d..63aeba7 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java @@ -202,9 +202,9 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream { private static final byte[] ONE = ZipLong.getBytes(1L); /** - * Holds the offsets of the LFH starts for each entry. + * Holds some book-keeping data for each entry. */ - private final Map<ZipArchiveEntry, Long> offsets = + private final Map<ZipArchiveEntry, EntryMetaData> metaData = new HashMap<>(); /** @@ -473,7 +473,7 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream { cdLength = streamCompressor.getTotalBytesWritten() - cdOffset; writeZip64CentralDirectory(); writeCentralDirectoryEnd(); - offsets.clear(); + metaData.clear(); entries.clear(); streamCompressor.close(); finished = true; @@ -539,7 +539,9 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream { rewriteSizesAndCrc(actuallyNeedsZip64); } - writeDataDescriptor(entry.entry); + if (!phased) { + writeDataDescriptor(entry.entry); + } entry = null; } @@ -695,7 +697,7 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream { // do some cleanup: // * rewrite version needed to extract channel.position(entry.localDataStart - 5 * SHORT); - writeOut(ZipShort.getBytes(versionNeededToExtractMethod(entry.entry.getMethod()))); + writeOut(ZipShort.getBytes(versionNeededToExtract(entry.entry.getMethod(), false, false))); // * remove ZIP64 extra so it doesn't get written // to the central directory @@ -1029,7 +1031,7 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream { final long localHeaderStart = streamCompressor.getTotalBytesWritten(); final byte[] localHeader = createLocalFileHeader(ze, name, encodable, phased, localHeaderStart); - offsets.put(ze, localHeaderStart); + metaData.put(ze, new EntryMetaData(localHeaderStart, usesDataDescriptor(ze.getMethod(), phased))); entry.localDataStart = localHeaderStart + LFH_CRC_OFFSET; // At crc offset writeCounted(localHeader); entry.dataStart = streamCompressor.getTotalBytesWritten(); @@ -1070,14 +1072,11 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream { //store method in local variable to prevent multiple method calls final int zipMethod = ze.getMethod(); + final boolean dataDescriptor = usesDataDescriptor(zipMethod, phased); - if (phased && !isZip64Required(entry.entry, zip64Mode)){ - putShort(versionNeededToExtractMethod(zipMethod), buf, LFH_VERSION_NEEDED_OFFSET); - } else { - putShort(versionNeededToExtract(zipMethod, hasZip64Extra(ze)), buf, LFH_VERSION_NEEDED_OFFSET); - } + putShort(versionNeededToExtract(zipMethod, hasZip64Extra(ze), dataDescriptor), buf, LFH_VERSION_NEEDED_OFFSET); - final GeneralPurposeBit generalPurposeBit = getGeneralPurposeBits(zipMethod, !encodable && fallbackToUTF8); + final GeneralPurposeBit generalPurposeBit = getGeneralPurposeBits(!encodable && fallbackToUTF8, dataDescriptor); generalPurposeBit.encode(buf, LFH_GPB_OFFSET); // compression method @@ -1169,7 +1168,7 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream { * @throws IOException on error */ protected void writeDataDescriptor(final ZipArchiveEntry ze) throws IOException { - if (ze.getMethod() != DEFLATED || channel != null) { + if (!usesDataDescriptor(ze.getMethod(), false)) { return; } writeCounted(DD_SIG); @@ -1198,11 +1197,11 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream { private byte[] createCentralFileHeader(final ZipArchiveEntry ze) throws IOException { - final long lfhOffset = offsets.get(ze); + final EntryMetaData entryMetaData = metaData.get(ze); final boolean needsZip64Extra = hasZip64Extra(ze) || ze.getCompressedSize() >= ZIP64_MAGIC || ze.getSize() >= ZIP64_MAGIC - || lfhOffset >= ZIP64_MAGIC + || entryMetaData.offset >= ZIP64_MAGIC || zip64Mode == Zip64Mode.Always; if (needsZip64Extra && zip64Mode == Zip64Mode.Never) { @@ -1214,19 +1213,20 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream { } - handleZip64Extra(ze, lfhOffset, needsZip64Extra); + handleZip64Extra(ze, entryMetaData.offset, needsZip64Extra); - return createCentralFileHeader(ze, getName(ze), lfhOffset, needsZip64Extra); + return createCentralFileHeader(ze, getName(ze), entryMetaData, needsZip64Extra); } /** * Writes the central file header entry. * @param ze the entry to write * @param name The encoded name - * @param lfhOffset Local file header offset for this file + * @param entryMetaData meta data for this file * @throws IOException on error */ - private byte[] createCentralFileHeader(final ZipArchiveEntry ze, final ByteBuffer name, final long lfhOffset, + private byte[] createCentralFileHeader(final ZipArchiveEntry ze, final ByteBuffer name, + final EntryMetaData entryMetaData, final boolean needsZip64Extra) throws IOException { final byte[] extra = ze.getCentralDirectoryExtra(); @@ -1251,8 +1251,9 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream { final int zipMethod = ze.getMethod(); final boolean encodable = zipEncoding.canEncode(ze.getName()); - putShort(versionNeededToExtract(zipMethod, needsZip64Extra), buf, CFH_VERSION_NEEDED_OFFSET); - getGeneralPurposeBits(zipMethod, !encodable && fallbackToUTF8).encode(buf, CFH_GPB_OFFSET); + putShort(versionNeededToExtract(zipMethod, needsZip64Extra, entryMetaData.usesDataDescriptor), + buf, CFH_VERSION_NEEDED_OFFSET); + getGeneralPurposeBits(!encodable && fallbackToUTF8, entryMetaData.usesDataDescriptor).encode(buf, CFH_GPB_OFFSET); // compression method putShort(zipMethod, buf, CFH_METHOD_OFFSET); @@ -1292,10 +1293,10 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream { putLong(ze.getExternalAttributes(), buf, CFH_EXTERNAL_ATTRIBUTES_OFFSET); // relative offset of LFH - if (lfhOffset >= ZIP64_MAGIC || zip64Mode == Zip64Mode.Always) { + if (entryMetaData.offset >= ZIP64_MAGIC || zip64Mode == Zip64Mode.Always) { putLong(ZIP64_MAGIC, buf, CFH_LFH_OFFSET); } else { - putLong(Math.min(lfhOffset, ZIP64_MAGIC), buf, CFH_LFH_OFFSET); + putLong(Math.min(entryMetaData.offset, ZIP64_MAGIC), buf, CFH_LFH_OFFSET); } // file name @@ -1469,28 +1470,27 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream { } - private GeneralPurposeBit getGeneralPurposeBits(final int zipMethod, final boolean utfFallback) { + private GeneralPurposeBit getGeneralPurposeBits(final boolean utfFallback, boolean usesDataDescriptor) { final GeneralPurposeBit b = new GeneralPurposeBit(); b.useUTF8ForNames(useUTF8Flag || utfFallback); - if (isDeflatedToOutputStream(zipMethod)) { + if (usesDataDescriptor) { b.useDataDescriptor(true); } return b; } - private int versionNeededToExtract(final int zipMethod, final boolean zip64) { + private int versionNeededToExtract(final int zipMethod, final boolean zip64, final boolean usedDataDescriptor) { if (zip64) { return ZIP64_MIN_VERSION; } - // requires version 2 as we are going to store length info - // in the data descriptor - return isDeflatedToOutputStream(zipMethod) - ? DATA_DESCRIPTOR_MIN_VERSION - : versionNeededToExtractMethod(zipMethod); + if (usedDataDescriptor) { + return DATA_DESCRIPTOR_MIN_VERSION; + } + return versionNeededToExtractMethod(zipMethod); } - private boolean isDeflatedToOutputStream(final int zipMethod) { - return zipMethod == DEFLATED && channel == null; + private boolean usesDataDescriptor(final int zipMethod, boolean phased) { + return !phased && zipMethod == DEFLATED && channel == null; } private int versionNeededToExtractMethod(int zipMethod) { @@ -1675,4 +1675,12 @@ public class ZipArchiveOutputStream extends ArchiveOutputStream { private boolean hasWritten; } + private static final class EntryMetaData { + private final long offset; + private final boolean usesDataDescriptor; + private EntryMetaData(long offset, boolean usesDataDescriptor) { + this.offset = offset; + this.usesDataDescriptor = usesDataDescriptor; + } + } } http://git-wip-us.apache.org/repos/asf/commons-compress/blob/d75b8985/src/test/java/org/apache/commons/compress/archivers/zip/DataDescriptorTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/DataDescriptorTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/DataDescriptorTest.java new file mode 100644 index 0000000..52078de --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/DataDescriptorTest.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.zip; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.Arrays; + +import org.apache.commons.compress.utils.IOUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import static org.apache.commons.compress.AbstractTestCase.mkdir; +import static org.apache.commons.compress.AbstractTestCase.rmdir; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; + +public class DataDescriptorTest { + + private File dir; + + @Before + public void setUp() throws Exception { + dir = mkdir("ddtest"); + } + + @After + public void tearDown() throws Exception { + rmdir(dir); + } + + @Test + public void writesDataDescriptorForDeflatedEntryOnUnseekableOutput() throws IOException { + ByteArrayOutputStream o = new ByteArrayOutputStream(); + try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(o)) { + zos.putArchiveEntry(new ZipArchiveEntry("test1.txt")); + zos.write("foo".getBytes("UTF-8")); + zos.closeArchiveEntry(); + } + byte[] data = o.toByteArray(); + + byte[] versionInLFH = Arrays.copyOfRange(data, 4, 6); + // 2.0 because of DD + assertArrayEquals(new byte[] { 20, 0 }, versionInLFH); + byte[] gpbInLFH = Arrays.copyOfRange(data, 6, 8); + // DD and EFS flags + assertArrayEquals(new byte[] { 8, 8 }, gpbInLFH); + byte[] crcAndSizedInLFH = Arrays.copyOfRange(data, 14, 26); + assertArrayEquals(new byte[12], crcAndSizedInLFH); + + int cdhStart = findCentralDirectory(data); + byte[] versionInCDH = Arrays.copyOfRange(data, cdhStart + 6, cdhStart + 8); + assertArrayEquals(new byte[] { 20, 0 }, versionInCDH); + byte[] gpbInCDH = Arrays.copyOfRange(data, cdhStart + 8, cdhStart + 10); + assertArrayEquals(new byte[] { 8, 8 }, gpbInCDH); + + int ddStart = cdhStart - 16; + assertEquals(ZipLong.DD_SIG, new ZipLong(data, ddStart)); + long crcFromDD = ZipLong.getValue(data, ddStart + 4); + long cSizeFromDD = ZipLong.getValue(data, ddStart + 8); + long sizeFromDD = ZipLong.getValue(data, ddStart + 12); + assertEquals(3, sizeFromDD); + + long crcFromCDH = ZipLong.getValue(data, cdhStart + 16); + assertEquals(crcFromDD, crcFromCDH); + long cSizeFromCDH = ZipLong.getValue(data, cdhStart + 20); + assertEquals(cSizeFromDD, cSizeFromCDH); + long sizeFromCDH = ZipLong.getValue(data, cdhStart + 24); + assertEquals(sizeFromDD, sizeFromCDH); + } + + @Test + public void doesntWriteDataDescriptorForDeflatedEntryOnSeekableOutput() throws IOException { + File f = new File(dir, "test.zip"); + try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(f)) { + zos.putArchiveEntry(new ZipArchiveEntry("test1.txt")); + zos.write("foo".getBytes("UTF-8")); + zos.closeArchiveEntry(); + } + + byte[] data; + try (FileInputStream fis = new FileInputStream(f)) { + data = IOUtils.toByteArray(fis); + } + + byte[] versionInLFH = Arrays.copyOfRange(data, 4, 6); + // still 2.0 because of Deflate + assertArrayEquals(new byte[] { 20, 0 }, versionInLFH); + byte[] gpbInLFH = Arrays.copyOfRange(data, 6, 8); + // no DD but EFS flag + assertArrayEquals(new byte[] { 0, 8 }, gpbInLFH); + + int cdhStart = findCentralDirectory(data); + byte[] versionInCDH = Arrays.copyOfRange(data, cdhStart + 6, cdhStart + 8); + assertArrayEquals(new byte[] { 20, 0 }, versionInCDH); + byte[] gpbInCDH = Arrays.copyOfRange(data, cdhStart + 8, cdhStart + 10); + assertArrayEquals(new byte[] { 0, 8 }, gpbInCDH); + + int ddStart = cdhStart - 16; + assertNotEquals(ZipLong.DD_SIG, new ZipLong(data, ddStart)); + long crcFromLFH = ZipLong.getValue(data, 14); + long cSizeFromLFH = ZipLong.getValue(data, 18); + long sizeFromLFH = ZipLong.getValue(data, 22); + assertEquals(3, sizeFromLFH); + + long crcFromCDH = ZipLong.getValue(data, cdhStart + 16); + assertEquals(crcFromLFH, crcFromCDH); + long cSizeFromCDH = ZipLong.getValue(data, cdhStart + 20); + assertEquals(cSizeFromLFH, cSizeFromCDH); + long sizeFromCDH = ZipLong.getValue(data, cdhStart + 24); + assertEquals(sizeFromLFH, sizeFromCDH); + } + + @Test + public void doesntWriteDataDescriptorWhenAddingRawEntries() throws IOException { + ByteArrayOutputStream init = new ByteArrayOutputStream(); + try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(init)) { + zos.putArchiveEntry(new ZipArchiveEntry("test1.txt")); + zos.write("foo".getBytes("UTF-8")); + zos.closeArchiveEntry(); + } + + File f = new File(dir, "test.zip"); + try (FileOutputStream fos = new FileOutputStream(f)) { + fos.write(init.toByteArray()); + } + + ByteArrayOutputStream o = new ByteArrayOutputStream(); + ZipArchiveEntry zae; + try (ZipFile zf = new ZipFile(f); + ZipArchiveOutputStream zos = new ZipArchiveOutputStream(o)) { + zae = zf.getEntry("test1.txt"); + zos.addRawArchiveEntry(zae, zf.getRawInputStream(zae)); + } + + byte[] data = o.toByteArray(); + byte[] versionInLFH = Arrays.copyOfRange(data, 4, 6); + // still 2.0 because of Deflate + assertArrayEquals(new byte[] { 20, 0 }, versionInLFH); + byte[] gpbInLFH = Arrays.copyOfRange(data, 6, 8); + // no DD but EFS flag + assertArrayEquals(new byte[] { 0, 8 }, gpbInLFH); + + int cdhStart = findCentralDirectory(data); + byte[] versionInCDH = Arrays.copyOfRange(data, cdhStart + 6, cdhStart + 8); + assertArrayEquals(new byte[] { 20, 0 }, versionInCDH); + byte[] gpbInCDH = Arrays.copyOfRange(data, cdhStart + 8, cdhStart + 10); + assertArrayEquals(new byte[] { 0, 8 }, gpbInCDH); + + int ddStart = cdhStart - 16; + assertNotEquals(ZipLong.DD_SIG, new ZipLong(data, ddStart)); + long crcFromLFH = ZipLong.getValue(data, 14); + long cSizeFromLFH = ZipLong.getValue(data, 18); + long sizeFromLFH = ZipLong.getValue(data, 22); + assertEquals(3, sizeFromLFH); + + long crcFromCDH = ZipLong.getValue(data, cdhStart + 16); + assertEquals(crcFromLFH, crcFromCDH); + long cSizeFromCDH = ZipLong.getValue(data, cdhStart + 20); + assertEquals(cSizeFromLFH, cSizeFromCDH); + long sizeFromCDH = ZipLong.getValue(data, cdhStart + 24); + assertEquals(sizeFromLFH, sizeFromCDH); + } + + private int findCentralDirectory(byte[] data) { + // not a ZIP64 archive, no comment, "End of central directory record" at the end + return (int) ZipLong.getValue(data, data.length - 22 + 16); + } +}