zip: ZipArchiveInputStream.java ZipArchiveOutputStream.java ZipFile.java

bodewig Thu, 11 Aug 2011 07:09:25 -0700

Author: bodewig
Date: Thu Aug 11 14:08:54 2011
New Revision: 1156621

URL: http://svn.apache.org/viewvc?rev=1156621&view=rev
Log:
a few 'extract method' refactorings to make code more readable, still won't win 
a beauty-contest


Modified:
    
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java
    
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
    
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java

Modified: 
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java
URL: 
http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java?rev=1156621&r1=1156620&r2=1156621&view=diff
==============================================================================
--- 
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java
 (original)
+++ 
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java
 Thu Aug 11 14:08:54 2011
@@ -39,12 +39,17 @@ import static org.apache.commons.compres
 
 /**
  * Implements an input stream that can read Zip archives.
- * <p>
- * Note that {@link ZipArchiveEntry#getSize()} may return -1 if the DEFLATE 
algorithm is used, as the size information
- * is not available from the header.
- * <p>
- * The {@link ZipFile} class is preferred when reading from files.
- *  
+ *
+ * <p>Note that {@link ZipArchiveEntry#getSize()} may return -1 if the
+ * DEFLATE algorithm is used, as the size information is not available
+ * from the header.</p>
+ *
+ * <p>The {@link ZipFile} class is preferred when reading from files.</p>
+ *
+ * <p>As of Apache Commons Compress it transparently supports Zip64
+ * extensions and thus individual entries and archives larger than 4
+ * GB or with more than 65536 entries.</p>
+ *
  * @see ZipFile
  * @NotThreadSafe
  */
@@ -238,7 +243,17 @@ public class ZipArchiveInputStream exten
                                                      null);
         }
 
-        Zip64ExtendedInformationExtraField z64 =  
+        processZip64Extra(size, cSize);
+        return current.entry;
+    }
+
+    /**
+     * Records whether a Zip64 extra is present and sets the size
+     * information from it if sizes are 0xFFFFFFFF and the entry
+     * doesn't use a data descriptor.
+     */
+    private void processZip64Extra(ZipLong size, ZipLong cSize) {
+        Zip64ExtendedInformationExtraField z64 =
             (Zip64ExtendedInformationExtraField)
             current.entry.getExtraField(Zip64ExtendedInformationExtraField
                                         .HEADER_ID);
@@ -255,7 +270,6 @@ public class ZipArchiveInputStream exten
                 current.entry.setSize(size.getValue());
             }
         }
-        return current.entry;
     }
 
     /** {@inheritDoc} */
@@ -303,6 +317,18 @@ public class ZipArchiveInputStream exten
             }
 
             if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
+                return readStored(buffer, start, length);
+            }
+            return readDeflated(buffer, start, length);
+        }
+        throw new ArrayIndexOutOfBoundsException();
+    }
+
+    /**
+     * Implementation of read for STORED entries.
+     */
+    private int readStored(byte[] buffer, int start, int length)
+        throws IOException {
                 if (current.hasDataDescriptor) {
                     if (lastStoredEntry == null) {
                         readStoredEntry();
@@ -335,8 +361,13 @@ public class ZipArchiveInputStream exten
                 current.bytesRead += toRead;
                 crc.update(buffer, start, toRead);
                 return toRead;
-            }
+    }
 
+    /**
+     * Implementation of read for DEFLATED entries.
+     */
+    private int readDeflated(byte[] buffer, int start, int length)
+        throws IOException {
             if (inf.needsInput()) {
                 fill();
                 if (buf.lengthOfLastRead > 0) {
@@ -358,8 +389,6 @@ public class ZipArchiveInputStream exten
             }
             crc.update(buffer, start, read);
             return read;
-        }
-        throw new ArrayIndexOutOfBoundsException();
     }
 
     @Override
@@ -376,7 +405,7 @@ public class ZipArchiveInputStream exten
      * stream.
      *
      * <p>This implementation may end up skipping over some smaller
-     * number of bytes, possibly 0, if an only if it reaches the end
+     * number of bytes, possibly 0, if and only if it reaches the end
      * of the underlying stream.</p>
      *
      * <p>The actual number of bytes skipped is returned.</p>
@@ -430,7 +459,7 @@ public class ZipArchiveInputStream exten
                 return false;
             }
         }
-        return true;        
+        return true;
     }
 
     /**
@@ -462,47 +491,13 @@ public class ZipArchiveInputStream exten
         // Ensure all entry bytes are read
         if (current.bytesReadFromStream <= current.entry.getCompressedSize()
                 && !current.hasDataDescriptor) {
-            long remaining = current.entry.getCompressedSize()
-                - current.bytesReadFromStream;
-            while (remaining > 0) {
-                long n = in.read(buf.buf, 0, (int) Math.min(buf.buf.length,
-                                                            remaining));
-                if (n < 0) {
-                    throw new EOFException(
-                            "Truncated ZIP entry: " + current.entry.getName());
-                } else {
-                    count(n);
-                    remaining -= n;
-                }
-            }
+            drainCurrentEntryData();
         } else {
             skip(Long.MAX_VALUE);
 
-            long inB;
-            if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
-                inB = inf.getBytesRead();
-                /* for Java < Java7 the getBytes* methods in
-                 * Inflater/Deflater seem to return unsigned ints
-                 * rather than longs that start over with 0 at 2^32.
-                 *
-                 * The stream knows how many bytes it has read, but
-                 * not how many the Inflater actually consumed - it
-                 * should be between the total number of bytes read
-                 * for the entry and the total number minus the last
-                 * read operation.  Here we just try to make the value
-                 * close enough to the bytes we've read by assuming
-                 * the number of bytes consumed must be smaller than
-                 * (or equal to) the number of bytes read but not
-                 * smaller by more than 2^32.
-                 */
-                if (current.bytesReadFromStream >= TWO_EXP_32) {
-                    while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
-                        inB += TWO_EXP_32;
-                    }
-                }
-            } else {
-                inB = current.bytesRead;
-            }
+            long inB = 
+                current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED
+                ? getBytesInflated() : current.bytesRead;
 
             // this is at most a single read() operation and can't
             // exceed the range of int
@@ -525,6 +520,51 @@ public class ZipArchiveInputStream exten
         lastStoredEntry = null;
     }
 
+    /**
+     * Read all data of the current entry from the underlying stream
+     * that hasn't been read, yet.
+     */
+    private void drainCurrentEntryData() throws IOException {
+            long remaining = current.entry.getCompressedSize()
+                - current.bytesReadFromStream;
+            while (remaining > 0) {
+                long n = in.read(buf.buf, 0, (int) Math.min(buf.buf.length,
+                                                            remaining));
+                if (n < 0) {
+                    throw new EOFException(
+                            "Truncated ZIP entry: " + current.entry.getName());
+                } else {
+                    count(n);
+                    remaining -= n;
+                }
+            }
+    }
+
+    /**
+     * Get the number of bytes Inflater has actually processed.
+     *
+     * <p>for Java &lt; Java7 the getBytes* methods in
+     * Inflater/Deflater seem to return unsigned ints rather than
+     * longs that start over with 0 at 2^32.</p>
+     *
+     * <p>The stream knows how many bytes it has read, but not how
+     * many the Inflater actually consumed - it should be between the
+     * total number of bytes read for the entry and the total number
+     * minus the last read operation.  Here we just try to make the
+     * value close enough to the bytes we've read by assuming the
+     * number of bytes consumed must be smaller than (or equal to) the
+     * number of bytes read but not smaller by more than 2^32.</p>
+     */
+    private long getBytesInflated() {
+        long inB = inf.getBytesRead();
+        if (current.bytesReadFromStream >= TWO_EXP_32) {
+            while (inB + TWO_EXP_32 <= current.bytesReadFromStream) {
+                inB += TWO_EXP_32;
+            }
+        }
+        return inB;
+    }
+
     private void fill() throws IOException {
         if (closed) {
             throw new IOException("The stream is closed");
@@ -615,9 +655,6 @@ public class ZipArchiveInputStream exten
      */
     private void readStoredEntry() throws IOException {
         ByteArrayOutputStream bos = new ByteArrayOutputStream();
-        byte[] LFH = ZipLong.LFH_SIG.getBytes();
-        byte[] CFH = ZipLong.CFH_SIG.getBytes();
-        byte[] DD = ZipLong.DD_SIG.getBytes();
         int off = 0;
         boolean done = false;
 
@@ -638,6 +675,32 @@ public class ZipArchiveInputStream exten
                 continue;
             }
 
+            done = bufferContainsSignature(bos, off, r, ddLen);
+            if (!done) {
+                off = cacheBytesRead(bos, off, r, ddLen);
+            }
+        }
+
+        byte[] b = bos.toByteArray();
+        lastStoredEntry = new ByteArrayInputStream(b);
+    }
+
+    private static final byte[] LFH = ZipLong.LFH_SIG.getBytes();
+    private static final byte[] CFH = ZipLong.CFH_SIG.getBytes();
+    private static final byte[] DD = ZipLong.DD_SIG.getBytes();
+
+    /**
+     * Checks whether the current buffer contains the signature of a
+     * &quot;data decsriptor&quot;, &quot;local file header&quot; or
+     * &quot;central directory entry&quot;.
+     *
+     * <p>If it contains such a signature, reads the data descriptor
+     * and positions the stream right after the data descriptor.</p>
+     */
+    private boolean bufferContainsSignature(ByteArrayOutputStream bos,
+                                            int off, int r, int ddLen)
+        throws IOException {
+        boolean done = false;
             int readTooMuch = 0;
             for (int i = 0; !done && i < r - 4; i++) {
                 if (buf.buf[i] == LFH[0] && buf.buf[i + 1] == LFH[1]) {
@@ -663,25 +726,31 @@ public class ZipArchiveInputStream exten
                     }
                 }
             }
-            if (!done) {
-                // worst case we've read a data descriptor without a
-                // signature (up to 20 bytes) plus the first three bytes of
-                // a LFH or CFH signature
-                // save the last ddLen + 3 bytes in the buffer, cache
-                // anything in front of that, read on
-                if (off + r > ddLen + 3) {
-                    bos.write(buf.buf, 0, off + r - ddLen - 3);
-                    System.arraycopy(buf.buf, off + r - ddLen - 3, buf.buf, 0,
-                                     ddLen + 3);
-                    off = ddLen + 3;
-                } else {
-                    off += r;
-                }
-            }
-        }
+        return done;
+    }
 
-        byte[] b = bos.toByteArray();
-        lastStoredEntry = new ByteArrayInputStream(b);
+    /**
+     * If the last read bytes could hold a data descriptor and an
+     * incomplete signature then save the last bytes to the front of
+     * the buffer and cache everything in front of the potential data
+     * descriptor into the given ByteArrayOutputStream.
+     *
+     * <p>Data descriptor plus incomplete signature (3 bytes in the
+     * worst case) can be 20 bytes max.</p>
+     */
+    private int cacheBytesRead(ByteArrayOutputStream bos, int offset,
+                               int lastRead, int expecteDDLen)
+        throws IOException {
+        final int cacheable = offset + lastRead - expecteDDLen - 3;
+        if (cacheable > 0) {
+            bos.write(buf.buf, 0, cacheable);
+            System.arraycopy(buf.buf, cacheable, buf.buf, 0,
+                             expecteDDLen + 3);
+            offset = expecteDDLen + 3;
+        } else {
+            offset += lastRead;
+        }
+        return offset;
     }
 
     private void pushback(byte[] buf, int offset, int length)

Modified: 
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
URL: 
http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java?rev=1156621&r1=1156620&r2=1156621&view=diff
==============================================================================
--- 
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
 (original)
+++ 
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java
 Thu Aug 11 14:08:54 2011
@@ -62,6 +62,14 @@ import static org.apache.commons.compres
  * the {@link #STORED STORED} method, here setting the CRC and
  * uncompressed size information is required before {@link
  * #putArchiveEntry(ArchiveEntry)} can be called.</p>
+ *
+ * <p>As of Apache Commons Compress it transparently supports Zip64
+ * extensions and thus individual entries and archives larger than 4
+ * GB or with more than 65536 entries in most cases but explicit
+ * control is provided via {@link setUseZip64}.  If the stream can not
+ * user RandomAccessFile and you try to write a ZipArchiveEntry of
+ * unknown size then Zip64 extensions will be disabled by default.</p>
+ *
  * @NotThreadSafe
  */
 public class ZipArchiveOutputStream extends ArchiveOutputStream {
@@ -394,6 +402,7 @@ public class ZipArchiveOutputStream exte
      * {@inheritDoc}
      * @throws Zip64RequiredException if the archive's size exceeds 4
      * GByte or there are more than 65535 entries inside the archive
+     * and {@link Zip64Mode #setUseZip64} is {@link Zip64Mode#Never}.
      */
     @Override
     public void finish() throws IOException {
@@ -422,7 +431,8 @@ public class ZipArchiveOutputStream exte
      * Writes all necessary data for this entry.
      * @throws IOException on error
      * @throws Zip64RequiredException if the entry's uncompressed or
-     * compressed size exceeds 4 GByte
+     * compressed size exceeds 4 GByte and {@link Zip64Mode
+     * #setUseZip64} is {@link Zip64Mode#Never}.
      */
     @Override
     public void closeArchiveEntry() throws IOException {
@@ -438,18 +448,45 @@ public class ZipArchiveOutputStream exte
             write(new byte[0], 0, 0);
         }
 
+        flushDeflater();
+
+        final Zip64Mode effectiveMode = getEffectiveZip64Mode(entry.entry);
+        long bytesWritten = written - entry.dataStart;
+        long realCrc = crc.getValue();
+        crc.reset();
+
+        final boolean actuallyNeedsZip64 =
+            handleSizesAndCrc(bytesWritten, realCrc, effectiveMode);
+
+        if (raf != null) {
+            rewriteSizesAndCrc(actuallyNeedsZip64);
+        }
+
+        writeDataDescriptor(entry.entry);
+        entry = null;
+    }
+
+    /**
+     * Ensures all bytes sent to the deflater are written to the stream.
+     */
+    private void flushDeflater() throws IOException {
         if (entry.entry.getMethod() == DEFLATED) {
             def.finish();
             while (!def.finished()) {
                 deflate();
             }
         }
+    }
 
-        final Zip64Mode effectiveMode = getEffectiveZip64Mode(entry.entry);
-        long bytesWritten = written - entry.dataStart;
-        long realCrc = crc.getValue();
-        crc.reset();
-
+    /**
+     * Ensures the current entry's size and CRC information is set to
+     * the values just written, verifies it isn't too big in the
+     * Zip64Mode.Never case and returns whether the entry would
+     * require a Zip64 extra field.
+     */
+    private boolean handleSizesAndCrc(long bytesWritten, long crc,
+                                      Zip64Mode effectiveMode)
+        throws ZipException {
         if (entry.entry.getMethod() == DEFLATED) {
             /* It turns out def.getBytesRead() returns wrong values if
              * the size exceeds 4 GB on Java < Java7
@@ -457,16 +494,16 @@ public class ZipArchiveOutputStream exte
             */
             entry.entry.setSize(entry.bytesRead);
             entry.entry.setCompressedSize(bytesWritten);
-            entry.entry.setCrc(realCrc);
+            entry.entry.setCrc(crc);
 
             def.reset();
         } else if (raf == null) {
-            if (entry.entry.getCrc() != realCrc) {
+            if (entry.entry.getCrc() != crc) {
                 throw new ZipException("bad CRC checksum for entry "
                                        + entry.entry.getName() + ": "
                                        + Long.toHexString(entry.entry.getCrc())
                                        + " instead of "
-                                       + Long.toHexString(realCrc));
+                                       + Long.toHexString(crc));
             }
 
             if (entry.entry.getSize() != bytesWritten) {
@@ -479,7 +516,7 @@ public class ZipArchiveOutputStream exte
         } else { /* method is STORED and we used RandomAccessFile */
             entry.entry.setSize(bytesWritten);
             entry.entry.setCompressedSize(bytesWritten);
-            entry.entry.setCrc(realCrc);
+            entry.entry.setCrc(crc);
         }
 
         final boolean actuallyNeedsZip64 = effectiveMode == Zip64Mode.Always
@@ -489,10 +526,16 @@ public class ZipArchiveOutputStream exte
             throw new Zip64RequiredException(Zip64RequiredException
                                              
.getEntryTooBigMessage(entry.entry));
         }
+        return actuallyNeedsZip64;
+    }
 
-        // If random access output, write the local file header containing
-        // the correct CRC and compressed/uncompressed sizes
-        if (raf != null) {
+    /**
+     * When using random access output, write the local file header
+     * and potentiall the ZIP64 extra containing the correct CRC and
+     * compressed/uncompressed sizes.
+     */
+    private void rewriteSizesAndCrc(boolean actuallyNeedsZip64)
+        throws IOException {
             long save = raf.getFilePointer();
 
             raf.seek(entry.localDataStart);
@@ -534,17 +577,14 @@ public class ZipArchiveOutputStream exte
                 }
             }
             raf.seek(save);
-        }
-
-        writeDataDescriptor(entry.entry);
-        entry = null;
     }
 
     /**
      * {@inheritDoc} 
      * @throws ClassCastException if entry is not an instance of 
ZipArchiveEntry
      * @throws Zip64RequiredException if the entry's uncompressed or
-     * compressed size is known to exceed 4 GByte
+     * compressed size is known to exceed 4 GByte and {@link Zip64Mode
+     * #setUseZip64} is {@link Zip64Mode#Never}.
      */
     @Override
     public void putArchiveEntry(ArchiveEntry archiveEntry) throws IOException {
@@ -559,14 +599,57 @@ public class ZipArchiveOutputStream exte
         entry = new CurrentEntry((ZipArchiveEntry) archiveEntry);
         entries.add(entry.entry);
 
-        if (entry.entry.getMethod() == -1) { // not specified
-            entry.entry.setMethod(method);
+        setDefaults(entry.entry);
+
+        final Zip64Mode effectiveMode = getEffectiveZip64Mode(entry.entry);
+        validateSizeInformation(effectiveMode);
+
+        if (shouldAddZip64Extra(entry.entry, effectiveMode)) {
+
+            Zip64ExtendedInformationExtraField z64 = 
getZip64Extra(entry.entry);
+
+            // just a placeholder, real data will be in data
+            // descriptor or inserted later via RandomAccessFile
+            ZipEightByteInteger size = ZipEightByteInteger.ZERO;
+            if (entry.entry.getMethod() == STORED
+                && entry.entry.getSize() != ArchiveEntry.SIZE_UNKNOWN) {
+                // actually, we already know the sizes
+                size = new ZipEightByteInteger(entry.entry.getSize());
+            }
+                z64.setSize(size);
+                z64.setCompressedSize(size);
+            entry.entry.setExtra();
+        }
+
+        if (entry.entry.getMethod() == DEFLATED && hasCompressionLevelChanged) 
{
+            def.setLevel(level);
+            hasCompressionLevelChanged = false;
+        }
+        writeLocalFileHeader(entry.entry);
+    }
+
+    /**
+     * Provides default values for compression method and last
+     * modification time.
+     */
+    private void setDefaults(ZipArchiveEntry entry) {
+        if (entry.getMethod() == -1) { // not specified
+            entry.setMethod(method);
         }
 
-        if (entry.entry.getTime() == -1) { // not specified
-            entry.entry.setTime(System.currentTimeMillis());
+        if (entry.getTime() == -1) { // not specified
+            entry.setTime(System.currentTimeMillis());
         }
+    }
 
+    /**
+     * Throws an exception if the size is unknown for a stored entry
+     * that is written to a non-seekable output or the entry is too
+     * big to be written without Zip64 extra but the mode has been set
+     * to Never.
+     */
+    private void validateSizeInformation(Zip64Mode effectiveMode)
+        throws ZipException {
         // Size/CRC not required if RandomAccessFile is used
         if (entry.entry.getMethod() == STORED && raf == null) {
             if (entry.entry.getSize() == ArchiveEntry.SIZE_UNKNOWN) {
@@ -581,49 +664,34 @@ public class ZipArchiveOutputStream exte
             entry.entry.setCompressedSize(entry.entry.getSize());
         }
 
-        final Zip64Mode effectiveMode = getEffectiveZip64Mode(entry.entry);
-
         if ((entry.entry.getSize() >= ZIP64_MAGIC
              || entry.entry.getCompressedSize() >= ZIP64_MAGIC)
             && effectiveMode == Zip64Mode.Never) {
             throw new Zip64RequiredException(Zip64RequiredException
                                              
.getEntryTooBigMessage(entry.entry));
         }
+    }
 
-        // add a ZIP64 extended information extra field if
-        // * mode is Always
-        // * or we already know it is going to be needed
-        // * or the size is unknown and we can ensure it won't hurt
-        //   other implementations if we add it (i.e. we can erase its
-        //   usage)
-        if (effectiveMode == Zip64Mode.Always
-            || entry.entry.getSize() >= ZIP64_MAGIC
-            || entry.entry.getCompressedSize() >= ZIP64_MAGIC
-            || (entry.entry.getSize() == ArchiveEntry.SIZE_UNKNOWN
-                && raf != null
-                && effectiveMode != Zip64Mode.Never)) {
-
-            Zip64ExtendedInformationExtraField z64 = 
getZip64Extra(entry.entry);
-            if (entry.entry.getMethod() == STORED
-                && entry.entry.getSize() != ArchiveEntry.SIZE_UNKNOWN) {
-                ZipEightByteInteger size =
-                    new ZipEightByteInteger(entry.entry.getSize());
-                z64.setSize(size);
-                z64.setCompressedSize(size);
-            } else {
-                // just a placeholder, real data will be in data
-                // descriptor or inserted later via RandomAccessFile
-                z64.setSize(ZipEightByteInteger.ZERO);
-                z64.setCompressedSize(ZipEightByteInteger.ZERO);
-            }
-            entry.entry.setExtra();
-        }
-
-        if (entry.entry.getMethod() == DEFLATED && hasCompressionLevelChanged) 
{
-            def.setLevel(level);
-            hasCompressionLevelChanged = false;
-        }
-        writeLocalFileHeader(entry.entry);
+    /**
+     * Whether to addd a Zip64 extended information extra field to the
+     * local file header.
+     *
+     * <p>Returns true if</p>
+     *
+     * <ul>
+     * <li>mode is Always</li>
+     * <li>or we already know it is going to be needed</li>
+     * <li>or the size is unknown and we can ensure it won't hurt
+     * other implementations if we add it (i.e. we can erase its
+     * usage</li>
+     * </ul>
+     */
+    private boolean shouldAddZip64Extra(ZipArchiveEntry entry, Zip64Mode mode) 
{
+        return mode == Zip64Mode.Always
+            || entry.getSize() >= ZIP64_MAGIC
+            || entry.getCompressedSize() >= ZIP64_MAGIC
+            || (entry.getSize() == ArchiveEntry.SIZE_UNKNOWN
+                && raf != null && mode != Zip64Mode.Never);
     }
 
     /**
@@ -689,6 +757,20 @@ public class ZipArchiveOutputStream exte
         ZipUtil.checkRequestedFeatures(entry.entry);
         entry.hasWritten = true;
         if (entry.entry.getMethod() == DEFLATED) {
+            writeDeflated(b, offset, length);
+        } else {
+            writeOut(b, offset, length);
+            written += length;
+        }
+        crc.update(b, offset, length);
+        count(length);
+    }
+
+    /**
+     * write implementation for DEFLATED entries.
+     */
+    private void writeDeflated(byte[]b, int offset, int length)
+        throws IOException {
             if (length > 0 && !def.finished()) {
                 entry.bytesRead += length;
                 if (length <= DEFLATER_BLOCK_SIZE) {
@@ -708,12 +790,6 @@ public class ZipArchiveOutputStream exte
                     }
                 }
             }
-        } else {
-            writeOut(b, offset, length);
-            written += length;
-        }
-        crc.update(b, offset, length);
-        count(length);
     }
 
     /**
@@ -723,19 +799,14 @@ public class ZipArchiveOutputStream exte
      * @exception  IOException  if an I/O error occurs.
      * @throws Zip64RequiredException if the archive's size exceeds 4
      * GByte or there are more than 65535 entries inside the archive
+     * and {@link Zip64Mode #setUseZip64} is {@link Zip64Mode#Never}.
      */
     @Override
     public void close() throws IOException {
         if (!finished) {
             finish();
         }
-
-        if (raf != null) {
-            raf.close();
-        }
-        if (out != null) {
-            out.close();
-        }
+        destroy();
     }
 
     /**
@@ -802,30 +873,7 @@ public class ZipArchiveOutputStream exte
         ByteBuffer name = getName(ze);
 
         if (createUnicodeExtraFields != UnicodeExtraFieldPolicy.NEVER) {
-
-            if (createUnicodeExtraFields == UnicodeExtraFieldPolicy.ALWAYS
-                || !encodable) {
-                ze.addExtraField(new UnicodePathExtraField(ze.getName(),
-                                                           name.array(),
-                                                           name.arrayOffset(),
-                                                           name.limit()));
-            }
-
-            String comm = ze.getComment();
-            if (comm != null && !"".equals(comm)) {
-
-                boolean commentEncodable = zipEncoding.canEncode(comm);
-
-                if (createUnicodeExtraFields == UnicodeExtraFieldPolicy.ALWAYS
-                    || !commentEncodable) {
-                    ByteBuffer commentB = getEntryEncoding(ze).encode(comm);
-                    ze.addExtraField(new UnicodeCommentExtraField(comm,
-                                                                  
commentB.array(),
-                                                                  
commentB.arrayOffset(),
-                                                                  
commentB.limit())
-                                     );
-                }
-            }
+            addUnicodeExtraFields(ze, encodable, name);
         }
 
         offsets.put(ze, Long.valueOf(written));
@@ -900,6 +948,39 @@ public class ZipArchiveOutputStream exte
     }
 
     /**
+     * Adds UnicodeExtra fields for name and file comment if mode is
+     * ALWAYS or the data cannot be encoded using the configured
+     * encoding.
+     */
+    private void addUnicodeExtraFields(ZipArchiveEntry ze, boolean encodable,
+                                       ByteBuffer name)
+        throws IOException {
+            if (createUnicodeExtraFields == UnicodeExtraFieldPolicy.ALWAYS
+                || !encodable) {
+                ze.addExtraField(new UnicodePathExtraField(ze.getName(),
+                                                           name.array(),
+                                                           name.arrayOffset(),
+                                                           name.limit()));
+            }
+
+            String comm = ze.getComment();
+            if (comm != null && !"".equals(comm)) {
+
+                boolean commentEncodable = zipEncoding.canEncode(comm);
+
+                if (createUnicodeExtraFields == UnicodeExtraFieldPolicy.ALWAYS
+                    || !commentEncodable) {
+                    ByteBuffer commentB = getEntryEncoding(ze).encode(comm);
+                    ze.addExtraField(new UnicodeCommentExtraField(comm,
+                                                                  
commentB.array(),
+                                                                  
commentB.arrayOffset(),
+                                                                  
commentB.limit())
+                                     );
+                }
+            }
+    }
+
+    /**
      * Writes the data descriptor entry.
      * @param ze the entry to write
      * @throws IOException on error
@@ -927,7 +1008,8 @@ public class ZipArchiveOutputStream exte
      * @param ze the entry to write
      * @throws IOException on error
      * @throws Zip64RequiredException if the archive's size exceeds 4
-     * GByte
+     * GByte and {@link Zip64Mode #setUseZip64} is {@link
+     * Zip64Mode#Never}.
      */
     protected void writeCentralFileHeader(ZipArchiveEntry ze) throws 
IOException {
         writeOut(CFH_SIG);
@@ -946,27 +1028,7 @@ public class ZipArchiveOutputStream exte
                                              .ARCHIVE_TOO_BIG_MESSAGE);
         }
 
-        if (needsZip64Extra) {
-            Zip64ExtendedInformationExtraField z64 = getZip64Extra(ze);
-            if (ze.getCompressedSize() >= ZIP64_MAGIC
-                || ze.getSize() >= ZIP64_MAGIC) {
-                z64.setCompressedSize(new 
ZipEightByteInteger(ze.getCompressedSize()));
-                z64.setSize(new ZipEightByteInteger(ze.getSize()));
-            } else {
-                // reset value that may have been set for LFH
-                z64.setCompressedSize(null);
-                z64.setSize(null);
-            }
-            if (lfhOffset >= ZIP64_MAGIC) {
-                z64.setRelativeHeaderOffset(new 
ZipEightByteInteger(lfhOffset));
-            }
-            ze.setExtra();
-        } else if (hasZip64Extra(ze)) {
-            // added to LFH but not really needed, probably because of
-            // Zip64Mode.Always
-            ze.removeExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
-            ze.setExtra();
-        }
+        handleZip64Extra(ze, lfhOffset, needsZip64Extra);
 
         // version made by
         // CheckStyle:MagicNumber OFF
@@ -1058,10 +1120,41 @@ public class ZipArchiveOutputStream exte
     }
 
     /**
+     * If the entry needs Zip64 extra information inside the central
+     * director then configure its data, otherwise remove it if one is
+     * present.
+     */
+    private void handleZip64Extra(ZipArchiveEntry ze, long lfhOffset,
+                                  boolean needsZip64Extra) {
+        if (needsZip64Extra) {
+            Zip64ExtendedInformationExtraField z64 = getZip64Extra(ze);
+            if (ze.getCompressedSize() >= ZIP64_MAGIC
+                || ze.getSize() >= ZIP64_MAGIC) {
+                z64.setCompressedSize(new 
ZipEightByteInteger(ze.getCompressedSize()));
+                z64.setSize(new ZipEightByteInteger(ze.getSize()));
+            } else {
+                // reset value that may have been set for LFH
+                z64.setCompressedSize(null);
+                z64.setSize(null);
+            }
+            if (lfhOffset >= ZIP64_MAGIC) {
+                z64.setRelativeHeaderOffset(new 
ZipEightByteInteger(lfhOffset));
+            }
+            ze.setExtra();
+        } else if (hasZip64Extra(ze)) {
+            // added to LFH but not really needed, probably because of
+            // Zip64Mode.Always
+            ze.removeExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
+            ze.setExtra();
+        }
+    }
+
+    /**
      * Writes the &quot;End of central dir record&quot;.
      * @throws IOException on error
      * @throws Zip64RequiredException if the archive's size exceeds 4
      * GByte or there are more than 65535 entries inside the archive
+     * and {@link Zip64Mode #setUseZip64} is {@link Zip64Mode#Never}.
      */
     protected void writeCentralDirectoryEnd() throws IOException {
         writeOut(EOCD_SIG);

Modified: 
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
URL: 
http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java?rev=1156621&r1=1156620&r2=1156621&view=diff
==============================================================================
--- 
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
 (original)
+++ 
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
 Thu Aug 11 14:08:54 2011
@@ -51,7 +51,9 @@ import static org.apache.commons.compres
  * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
  * have to reimplement all methods anyway.  Like
  * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the
- * covers and supports compressed and uncompressed entries.</p>
+ * covers and supports compressed and uncompressed entries.  It also
+ * transparently supports Zip64 extensions and thus individual entries
+ * and archives larger than 4 GB or with more than 65536 entries.</p>
  *
  * <p>The method signatures mimic the ones of
  * <code>java.util.zip.ZipFile</code>, with a couple of exceptions:
@@ -377,6 +379,9 @@ public class ZipFile {
         /* external file attributes        */ + WORD
         /* relative offset of local header */ + WORD;
 
+    private static final long CFH_SIG =
+        ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
+
     /**
      * Reads the central directory of the given archive and populates
      * the internal tables with ZipArchiveEntry instances.
@@ -395,17 +400,36 @@ public class ZipFile {
 
         positionAtCentralDirectory();
 
-        byte[] cfh = new byte[CFH_LEN];
-
         byte[] signatureBytes = new byte[WORD];
         archive.readFully(signatureBytes);
         long sig = ZipLong.getValue(signatureBytes);
-        final long cfhSig = ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
-        if (sig != cfhSig && startsWithLocalFileHeader()) {
+
+        if (sig != CFH_SIG && startsWithLocalFileHeader()) {
             throw new IOException("central directory is empty, can't expand"
                                   + " corrupt archive.");
         }
-        while (sig == cfhSig) {
+
+        while (sig == CFH_SIG) {
+            readCentralDirectoryEntry(noUTF8Flag);
+            archive.readFully(signatureBytes);
+            sig = ZipLong.getValue(signatureBytes);
+        }
+        return noUTF8Flag;
+    }
+
+    /**
+     * Reads an individual entry of the central directory, creats an
+     * ZipArchiveEntry from it and adds it to the global maps.
+     *
+     * @param noUTF8Flag map used to collect entries that don't have
+     * their UTF-8 flag set and whose name will be set by data read
+     * from the local file header later.  The current entry may be
+     * added to this map.
+     */
+    private void
+        readCentralDirectoryEntry(Map<ZipArchiveEntry, NameAndComment> 
noUTF8Flag) throws IOException {
+        byte[] cfh = new byte[CFH_LEN];
+
             archive.readFully(cfh);
             int off = 0;
             ZipArchiveEntry ze = new ZipArchiveEntry();
@@ -427,9 +451,6 @@ public class ZipFile {
             ze.setMethod(ZipShort.getValue(cfh, off));
             off += SHORT;
 
-            // FIXME this is actually not very cpu cycles friendly as we are 
converting from
-            // dos to java while the underlying Sun implementation will convert
-            // from java to dos time for internal storage...
             long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfh, off));
             ze.setTime(time);
             off += WORD;
@@ -477,6 +498,33 @@ public class ZipFile {
             archive.readFully(cdExtraData);
             ze.setCentralDirectoryExtra(cdExtraData);
 
+            setSizesAndOffsetFromZip64Extra(ze, offset, diskStart);
+
+            byte[] comment = new byte[commentLen];
+            archive.readFully(comment);
+            ze.setComment(entryEncoding.decode(comment));
+
+            if (!hasUTF8Flag && useUnicodeExtraFields) {
+                noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
+            }
+    }
+
+    /**
+     * If the entry holds a Zip64 extended information extra field,
+     * read sizes from there if the entry's sizes are set to
+     * 0xFFFFFFFFF, do the same for the offset of the local file
+     * header.
+     *
+     * <p>Ensures the Zip64 extra either knows both compressed and
+     * uncompressed size or neither of both as the internal logic in
+     * ExtraFieldUtils forces the field to create local header data
+     * even if they are never used - and here a field with only one
+     * size would be invalid.</p>
+     */
+    private void setSizesAndOffsetFromZip64Extra(ZipArchiveEntry ze,
+                                                 OffsetEntry offset,
+                                                 int diskStart)
+        throws IOException {
             Zip64ExtendedInformationExtraField z64 =
                 (Zip64ExtendedInformationExtraField)
                 ze.getExtraField(Zip64ExtendedInformationExtraField
@@ -492,10 +540,6 @@ public class ZipFile {
                                                 hasRelativeHeaderOffset,
                                                 diskStart == 
ZIP64_MAGIC_SHORT);
 
-                // read ZIP64 values into entry.
-                // ensure ZIP64 field either knows no or both size
-                // values so it can create valid local header extra data
-
                 if (hasUncompressedSize) {
                     ze.setSize(z64.getSize().getLongValue());
                 } else if (hasCompressedSize) {
@@ -514,19 +558,6 @@ public class ZipFile {
                         z64.getRelativeHeaderOffset().getLongValue();
                 }
             }
-
-            byte[] comment = new byte[commentLen];
-            archive.readFully(comment);
-            ze.setComment(entryEncoding.decode(comment));
-
-            archive.readFully(signatureBytes);
-            sig = ZipLong.getValue(signatureBytes);
-
-            if (!hasUTF8Flag && useUnicodeExtraFields) {
-                noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
-            }
-        }
-        return noUTF8Flag;
     }
 
     /**
@@ -637,6 +668,18 @@ public class ZipFile {
             // not a ZIP64 archive
             positionAtCentralDirectory32();
         } else {
+            positionAtCentralDirectory64();
+        }
+    }
+
+    /**
+     * Parses the &quot;Zip64 end of central directory locator&quot;,
+     * finds the &quot;Zip64 end of central directory record&quot; using the
+     * parsed information, parses that and positions the stream at the
+     * first central directory record.
+     */
+    private void positionAtCentralDirectory64()
+        throws IOException {
             archive.skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET);
             byte[] zip64EocdOffset = new byte[DWORD];
             archive.readFully(zip64EocdOffset);
@@ -656,7 +699,6 @@ public class ZipFile {
             byte[] cfdOffset = new byte[DWORD];
             archive.readFully(cfdOffset);
             archive.seek(ZipEightByteInteger.getLongValue(cfdOffset));
-        }
     }
 
     /**

svn commit: r1156621 - in /commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip: ZipArchiveInputStream.java ZipArchiveOutputStream.java ZipFile.java

Reply via email to