COMPRESS-327 read zip archives from arbitrary SeekableByteChannels

Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/d8fc27b4
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/d8fc27b4
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/d8fc27b4

Branch: refs/heads/master
Commit: d8fc27b40be0c71090fb11d47aba813ce87f3d8b
Parents: 94197a7
Author: Stefan Bodewig <bode...@apache.org>
Authored: Wed Oct 12 18:37:16 2016 +0200
Committer: Stefan Bodewig <bode...@apache.org>
Committed: Wed Oct 12 18:37:16 2016 +0200

----------------------------------------------------------------------
 .../commons/compress/archivers/zip/ZipFile.java | 181 +++++++++++++------
 1 file changed, 124 insertions(+), 57 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/d8fc27b4/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java 
b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
index b38381d..fbe9087 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
@@ -23,11 +23,15 @@ import java.io.EOFException;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.SeekableByteChannel;
+import java.nio.file.Files;
+import java.nio.file.StandardOpenOption;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.Enumeration;
+import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
@@ -57,7 +61,7 @@ import static 
org.apache.commons.compress.archivers.zip.ZipConstants.ZIP64_MAGIC
  *
  * <p>It doesn't extend <code>java.util.zip.ZipFile</code> as it would
  * have to reimplement all methods anyway.  Like
- * <code>java.util.ZipFile</code>, it uses RandomAccessFile under the
+ * <code>java.util.ZipFile</code>, it uses SeekableByteChannel under the
  * covers and supports compressed and uncompressed entries.  As of
  * Apache Commons Compress 1.3 it also transparently supports Zip64
  * extensions and thus individual entries and archives larger than 4
@@ -125,7 +129,7 @@ public class ZipFile implements Closeable {
     /**
      * The actual data source.
      */
-    private final RandomAccessFile archive;
+    private final SeekableByteChannel archive;
 
     /**
      * Whether to look for and use Unicode extra fields.
@@ -142,6 +146,10 @@ public class ZipFile implements Closeable {
     private final byte[] WORD_BUF = new byte[WORD];
     private final byte[] CFH_BUF = new byte[CFH_LEN];
     private final byte[] SHORT_BUF = new byte[SHORT];
+    private final ByteBuffer DWORD_BBUF = ByteBuffer.wrap(DWORD_BUF);
+    private final ByteBuffer WORD_BBUF = ByteBuffer.wrap(WORD_BUF);
+    private final ByteBuffer CFH_BBUF = ByteBuffer.wrap(CFH_BUF);
+    private final ByteBuffer SHORT_BBUF = ByteBuffer.wrap(SHORT_BUF);
 
     /**
      * Opens the given file for reading, assuming "UTF8" for file names.
@@ -207,11 +215,38 @@ public class ZipFile implements Closeable {
      */
     public ZipFile(final File f, final String encoding, final boolean 
useUnicodeExtraFields)
         throws IOException {
-        this.archiveName = f.getAbsolutePath();
+        this(Files.newByteChannel(f.toPath(), 
EnumSet.of(StandardOpenOption.READ)),
+             f.getAbsolutePath(), encoding, useUnicodeExtraFields, true);
+    }
+
+    /**
+     * Opens the given file for reading, assuming the specified
+     * encoding for file names.
+     *
+     * @param channel the archive.
+     * @param archiveName name of the archivem used for error messages only.
+     * @param encoding the encoding to use for file names, use null
+     * for the platform's default encoding
+     * @param useUnicodeExtraFields whether to use InfoZIP Unicode
+     * Extra Fields (if present) to set the file names.
+     *
+     * @throws IOException if an error occurs while reading the file.
+     */
+    public ZipFile(final SeekableByteChannel channel, final String archiveName,
+                   final String encoding, final boolean useUnicodeExtraFields)
+        throws IOException {
+        this(channel, archiveName, encoding, useUnicodeExtraFields, false);
+    }
+
+    private ZipFile(final SeekableByteChannel channel, final String 
archiveName,
+                    final String encoding, final boolean useUnicodeExtraFields,
+                    final boolean closeOnError)
+        throws IOException {
+        this.archiveName = archiveName;
         this.encoding = encoding;
         this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
         this.useUnicodeExtraFields = useUnicodeExtraFields;
-        archive = new RandomAccessFile(f, "r");
+        archive = channel;
         boolean success = false;
         try {
             final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag =
@@ -220,7 +255,7 @@ public class ZipFile implements Closeable {
             success = true;
         } finally {
             closed = !success;
-            if (!success) {
+            if (!success && closeOnError) {
                 IOUtils.closeQuietly(archive);
             }
         }
@@ -541,7 +576,8 @@ public class ZipFile implements Closeable {
 
         positionAtCentralDirectory();
 
-        archive.readFully(WORD_BUF);
+        WORD_BBUF.rewind();
+        IOUtils.readFully(archive, WORD_BBUF);
         long sig = ZipLong.getValue(WORD_BUF);
 
         if (sig != CFH_SIG && startsWithLocalFileHeader()) {
@@ -551,7 +587,8 @@ public class ZipFile implements Closeable {
 
         while (sig == CFH_SIG) {
             readCentralDirectoryEntry(noUTF8Flag);
-            archive.readFully(WORD_BUF);
+            WORD_BBUF.rewind();
+            IOUtils.readFully(archive, WORD_BBUF);
             sig = ZipLong.getValue(WORD_BUF);
         }
         return noUTF8Flag;
@@ -569,7 +606,8 @@ public class ZipFile implements Closeable {
     private void
         readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> 
noUTF8Flag)
         throws IOException {
-        archive.readFully(CFH_BUF);
+        CFH_BBUF.rewind();
+        IOUtils.readFully(archive, CFH_BBUF);
         int off = 0;
         final OffsetEntry offset = new OffsetEntry();
         final Entry ze = new Entry(offset);
@@ -627,7 +665,7 @@ public class ZipFile implements Closeable {
         off += WORD;
 
         final byte[] fileName = new byte[fileNameLen];
-        archive.readFully(fileName);
+        IOUtils.readFully(archive, ByteBuffer.wrap(fileName));
         ze.setName(entryEncoding.decode(fileName), fileName);
 
         // LFH offset,
@@ -636,13 +674,13 @@ public class ZipFile implements Closeable {
         entries.add(ze);
 
         final byte[] cdExtraData = new byte[extraLen];
-        archive.readFully(cdExtraData);
+        IOUtils.readFully(archive, ByteBuffer.wrap(cdExtraData));
         ze.setCentralDirectoryExtra(cdExtraData);
 
         setSizesAndOffsetFromZip64Extra(ze, offset, diskStart);
 
         final byte[] comment = new byte[commentLen];
-        archive.readFully(comment);
+        IOUtils.readFully(archive, ByteBuffer.wrap(comment));
         ze.setComment(entryEncoding.decode(comment));
 
         if (!hasUTF8Flag && useUnicodeExtraFields) {
@@ -801,10 +839,11 @@ public class ZipFile implements Closeable {
         positionAtEndOfCentralDirectoryRecord();
         boolean found = false;
         final boolean searchedForZip64EOCD =
-            archive.getFilePointer() > ZIP64_EOCDL_LENGTH;
+            archive.position() > ZIP64_EOCDL_LENGTH;
         if (searchedForZip64EOCD) {
-            archive.seek(archive.getFilePointer() - ZIP64_EOCDL_LENGTH);
-            archive.readFully(WORD_BUF);
+            archive.position(archive.position() - ZIP64_EOCDL_LENGTH);
+            WORD_BBUF.rewind();
+            IOUtils.readFully(archive, WORD_BBUF);
             found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG,
                                   WORD_BUF);
         }
@@ -832,17 +871,20 @@ public class ZipFile implements Closeable {
         throws IOException {
         skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET
                   - WORD /* signature has already been read */);
-        archive.readFully(DWORD_BUF);
-        archive.seek(ZipEightByteInteger.getLongValue(DWORD_BUF));
-        archive.readFully(WORD_BUF);
+        DWORD_BBUF.rewind();
+        IOUtils.readFully(archive, DWORD_BBUF);
+        archive.position(ZipEightByteInteger.getLongValue(DWORD_BUF));
+        WORD_BBUF.rewind();
+        IOUtils.readFully(archive, WORD_BBUF);
         if (!Arrays.equals(WORD_BUF, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) {
             throw new ZipException("archive's ZIP64 end of central "
                                    + "directory locator is corrupt.");
         }
         skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET
                   - WORD /* signature has already been read */);
-        archive.readFully(DWORD_BUF);
-        archive.seek(ZipEightByteInteger.getLongValue(DWORD_BUF));
+        DWORD_BBUF.rewind();
+        IOUtils.readFully(archive, DWORD_BBUF);
+        archive.position(ZipEightByteInteger.getLongValue(DWORD_BUF));
     }
 
     /**
@@ -855,8 +897,9 @@ public class ZipFile implements Closeable {
     private void positionAtCentralDirectory32()
         throws IOException {
         skipBytes(CFD_LOCATOR_OFFSET);
-        archive.readFully(WORD_BUF);
-        archive.seek(ZipLong.getValue(WORD_BUF));
+        WORD_BBUF.rewind();
+        IOUtils.readFully(archive, WORD_BBUF);
+        archive.position(ZipLong.getValue(WORD_BUF));
     }
 
     /**
@@ -881,22 +924,26 @@ public class ZipFile implements Closeable {
                                          final long maxDistanceFromEnd,
                                          final byte[] sig) throws IOException {
         boolean found = false;
-        long off = archive.length() - minDistanceFromEnd;
+        long off = archive.size() - minDistanceFromEnd;
         final long stopSearching =
-            Math.max(0L, archive.length() - maxDistanceFromEnd);
+            Math.max(0L, archive.size() - maxDistanceFromEnd);
         if (off >= 0) {
             for (; off >= stopSearching; off--) {
-                archive.seek(off);
-                int curr = archive.read();
-                if (curr == -1) {
+                archive.position(off);
+                try {
+                    WORD_BBUF.rewind();
+                    IOUtils.readFully(archive, WORD_BBUF);
+                    WORD_BBUF.flip();
+                } catch (EOFException ex) {
                     break;
                 }
+                int curr = WORD_BBUF.get();
                 if (curr == sig[POS_0]) {
-                    curr = archive.read();
+                    curr = WORD_BBUF.get();
                     if (curr == sig[POS_1]) {
-                        curr = archive.read();
+                        curr = WORD_BBUF.get();
                         if (curr == sig[POS_2]) {
-                            curr = archive.read();
+                            curr = WORD_BBUF.get();
                             if (curr == sig[POS_3]) {
                                 found = true;
                                 break;
@@ -907,7 +954,7 @@ public class ZipFile implements Closeable {
             }
         }
         if (found) {
-            archive.seek(off);
+            archive.position(off);
         }
         return found;
     }
@@ -917,14 +964,12 @@ public class ZipFile implements Closeable {
      * skipping failed.
      */ 
     private void skipBytes(final int count) throws IOException {
-        int totalSkipped = 0;
-        while (totalSkipped < count) {
-            final int skippedNow = archive.skipBytes(count - totalSkipped);
-            if (skippedNow <= 0) {
-                throw new EOFException();
-            }
-            totalSkipped += skippedNow;
+        long currentPosition = archive.position();
+        long newPosition = currentPosition + count;
+        if (newPosition > archive.size()) {
+            throw new EOFException();
         }
+        archive.position(newPosition);
     }
 
     /**
@@ -958,22 +1003,16 @@ public class ZipFile implements Closeable {
             final Entry ze = (Entry) zipArchiveEntry;
             final OffsetEntry offsetEntry = ze.getOffsetEntry();
             final long offset = offsetEntry.headerOffset;
-            archive.seek(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
-            archive.readFully(SHORT_BUF);
+            archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
+            SHORT_BBUF.rewind();
+            IOUtils.readFully(archive, SHORT_BBUF);
             final int fileNameLen = ZipShort.getValue(SHORT_BUF);
-            archive.readFully(SHORT_BUF);
+            SHORT_BBUF.rewind();
+            IOUtils.readFully(archive, SHORT_BBUF);
             final int extraFieldLen = ZipShort.getValue(SHORT_BUF);
-            int lenToSkip = fileNameLen;
-            while (lenToSkip > 0) {
-                final int skipped = archive.skipBytes(lenToSkip);
-                if (skipped <= 0) {
-                    throw new IOException("failed to skip file name in"
-                                          + " local file header");
-                }
-                lenToSkip -= skipped;
-            }
+            skipBytes(fileNameLen);
             final byte[] localExtraData = new byte[extraFieldLen];
-            archive.readFully(localExtraData);
+            IOUtils.readFully(archive, ByteBuffer.wrap(localExtraData));
             ze.setExtra(localExtraData);
             offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
                 + SHORT + SHORT + fileNameLen + extraFieldLen;
@@ -999,8 +1038,9 @@ public class ZipFile implements Closeable {
      * it may be an empty archive.
      */
     private boolean startsWithLocalFileHeader() throws IOException {
-        archive.seek(0);
-        archive.readFully(WORD_BUF);
+        archive.position(0);
+        WORD_BBUF.rewind();
+        IOUtils.readFully(archive, WORD_BBUF);
         return Arrays.equals(WORD_BUF, ZipArchiveOutputStream.LFH_SIG);
     }
 
@@ -1010,6 +1050,8 @@ public class ZipFile implements Closeable {
      * range can be read.
      */
     private class BoundedInputStream extends InputStream {
+        private static final int MAX_BUF_LEN = 8192;
+        private final ByteBuffer buffer;
         private long remaining;
         private long loc;
         private boolean addDummyByte = false;
@@ -1017,6 +1059,11 @@ public class ZipFile implements Closeable {
         BoundedInputStream(final long start, final long remaining) {
             this.remaining = remaining;
             loc = start;
+            if (remaining < MAX_BUF_LEN && remaining > 0) {
+                buffer = ByteBuffer.allocate((int) remaining);
+            } else {
+                buffer = ByteBuffer.allocate(MAX_BUF_LEN);
+            }
         }
 
         @Override
@@ -1029,8 +1076,12 @@ public class ZipFile implements Closeable {
                 return -1;
             }
             synchronized (archive) {
-                archive.seek(loc++);
-                return archive.read();
+                archive.position(loc++);
+                int read = read(1);
+                if (read < 0) {
+                    return read;
+                }
+                return buffer.get() & 0xff;
             }
         }
 
@@ -1052,18 +1103,34 @@ public class ZipFile implements Closeable {
             if (len > remaining) {
                 len = (int) remaining;
             }
+            ByteBuffer buf;
             int ret = -1;
             synchronized (archive) {
-                archive.seek(loc);
-                ret = archive.read(b, off, len);
+                archive.position(loc);
+                if (len <= buffer.capacity()) {
+                    buf = buffer;
+                    ret = read(len);
+                } else {
+                    buf = ByteBuffer.allocate(len);
+                    ret = archive.read(buf);
+                    buf.flip();
+                }
             }
             if (ret > 0) {
+                buf.get(b, off, ret);
                 loc += ret;
                 remaining -= ret;
             }
             return ret;
         }
 
+        private int read(int len) throws IOException {
+            buffer.rewind().limit(len);
+            int read = archive.read(buffer);
+            buffer.flip();
+            return read;
+        }
+
         /**
          * Inflater needs an extra dummy byte for nowrap - see
          * Inflater's javadocs.

Reply via email to