Author: bodewig
Date: Thu Feb 26 13:15:14 2009
New Revision: 748133

URL: http://svn.apache.org/viewvc?rev=748133&view=rev
Log:
optionally use UnicodeExtraFields to set names and comments of entries when 
reading.  SANDBOX-176

Modified:
    
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
    
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
    
commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java

Modified: 
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
URL: 
http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java?rev=748133&r1=748132&r2=748133&view=diff
==============================================================================
--- 
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
 (original)
+++ 
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
 Thu Feb 26 13:15:14 2009
@@ -184,8 +184,12 @@
      *                 <code>"UTF-8"</code> is supported in ZIP file
      *                 version <code>6.3</code> or later.
      */
-    static final String decodeName(byte[] name, String encoding) {
+    static final String decodeName(byte[] name, String encoding)
+        throws java.nio.charset.CharacterCodingException {
         Charset cs = Charset.forName(encoding);
-        return cs.decode(ByteBuffer.wrap(name)).toString();
+        return cs.newDecoder()
+            .onMalformedInput(CodingErrorAction.REPORT)
+            .onUnmappableCharacter(CodingErrorAction.REPORT)
+            .decode(ByteBuffer.wrap(name)).toString();
     }
 }

Modified: 
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
URL: 
http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java?rev=748133&r1=748132&r2=748133&view=diff
==============================================================================
--- 
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
 (original)
+++ 
commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
 Thu Feb 26 13:15:14 2009
@@ -22,12 +22,14 @@
 import java.io.InputStream;
 import java.io.RandomAccessFile;
 import java.io.UnsupportedEncodingException;
+import java.nio.charset.CharacterCodingException;
 import java.util.Calendar;
 import java.util.Collections;
 import java.util.Date;
 import java.util.Enumeration;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.zip.CRC32;
 import java.util.zip.Inflater;
 import java.util.zip.InflaterInputStream;
 import java.util.zip.ZipException;
@@ -101,6 +103,11 @@
     private RandomAccessFile archive;
 
     /**
+     * Whether to look for and use Unicode extra fields.
+     */
+    private final boolean useUnicodeExtraFields;
+
+    /**
      * Opens the given file for reading, assuming "UTF8" for file names.
      *
      * @param f the archive.
@@ -124,7 +131,7 @@
 
     /**
      * Opens the given file for reading, assuming the specified
-     * encoding for file names.
+     * encoding for file names and ignoring unicode extra fields.
      *
      * @param name name of the archive.
      * @param encoding the encoding to use for file names, use null
@@ -133,12 +140,12 @@
      * @throws IOException if an error occurs while reading the file.
      */
     public ZipFile(String name, String encoding) throws IOException {
-        this(new File(name), encoding);
+        this(new File(name), encoding, false);
     }
 
     /**
      * Opens the given file for reading, assuming the specified
-     * encoding for file names.
+     * encoding for file names and ignoring unicode extra fields.
      *
      * @param f the archive.
      * @param encoding the encoding to use for file names, use null
@@ -147,12 +154,30 @@
      * @throws IOException if an error occurs while reading the file.
      */
     public ZipFile(File f, String encoding) throws IOException {
+        this(f, encoding, false);
+    }
+
+    /**
+     * Opens the given file for reading, assuming the specified
+     * encoding for file names.
+     *
+     * @param f the archive.
+     * @param encoding the encoding to use for file names, use null
+     * for the platform's default encoding
+     * @param whether to use InfoZIP Unicode Extra Fields (if present)
+     * to set the file names.
+     *
+     * @throws IOException if an error occurs while reading the file.
+     */
+    public ZipFile(File f, String encoding, boolean useUnicodeExtraFields)
+        throws IOException {
         this.encoding = encoding;
+        this.useUnicodeExtraFields = useUnicodeExtraFields;
         archive = new RandomAccessFile(f, "r");
         boolean success = false;
         try {
-            populateFromCentralDirectory();
-            resolveLocalFileHeaderData();
+            Map entriesWithoutEFS = populateFromCentralDirectory();
+            resolveLocalFileHeaderData(entriesWithoutEFS);
             success = true;
         } finally {
             if (!success) {
@@ -269,9 +294,15 @@
      * <p>The ZipArchiveEntrys will know all data that can be obtained from
      * the central directory alone, but not the data that requires the
      * local file header or additional data to be read.</p>
+     *
+     * @return a Map&lt;ZipArchiveEntry, NameAndComment>&gt; of
+     * zipentries that didn't have the language encoding flag set when
+     * read.
      */
-    private void populateFromCentralDirectory()
+    private Map populateFromCentralDirectory()
         throws IOException {
+        HashMap noEFS = new HashMap();
+
         positionAtCentralDirectory();
 
         byte[] cfh = new byte[CFH_LEN];
@@ -296,10 +327,10 @@
             off += SHORT; // skip version info
 
             final int generalPurposeFlag = ZipShort.getValue(cfh, off);
-            final String entryEncoding = 
-                (generalPurposeFlag & ZipArchiveOutputStream.EFS_FLAG) != 0
-                ? ZipArchiveOutputStream.UTF8
-                : encoding;
+            final boolean hasEFS = 
+                (generalPurposeFlag & ZipArchiveOutputStream.EFS_FLAG) != 0;
+            final String entryEncoding =
+                hasEFS ? ZipArchiveOutputStream.UTF8 : encoding;
 
             off += SHORT;
 
@@ -367,7 +398,12 @@
 
             archive.readFully(signatureBytes);
             sig = ZipLong.getValue(signatureBytes);
+
+            if (!hasEFS && useUnicodeExtraFields) {
+                noEFS.put(ze, new NameAndComment(fileName, comment));
+            }
         }
+        return noEFS;
     }
 
     private static final int MIN_EOCD_SIZE =
@@ -462,7 +498,7 @@
      * <p>Also records the offsets for the data to read from the
      * entries.</p>
      */
-    private void resolveLocalFileHeaderData()
+    private void resolveLocalFileHeaderData(Map entriesWithoutEFS)
         throws IOException {
         Enumeration e = getEntries();
         while (e.hasMoreElements()) {
@@ -493,6 +529,12 @@
             */
             offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH
                                      + SHORT + SHORT + fileNameLen + 
extraFieldLen;
+
+            if (entriesWithoutEFS.containsKey(ze)) {
+                setNameAndCommentFromExtraFields(ze,
+                                                 (NameAndComment)
+                                                 entriesWithoutEFS.get(ze));
+            }
         }
     }
 
@@ -538,7 +580,11 @@
             return new String(bytes);
         } else {
             try {
-                return ZipEncodingHelper.decodeName(bytes, enc);
+                try {
+                    return ZipEncodingHelper.decodeName(bytes, enc);
+                } catch (CharacterCodingException ex) {
+                    throw new ZipException(ex.getMessage());
+                }
             } catch (java.nio.charset.UnsupportedCharsetException ex) {
                 // Java 1.4's NIO doesn't recognize a few names that
                 // String.getBytes does
@@ -568,6 +614,65 @@
     }
 
     /**
+     * If the entry has Unicode*ExtraFields and the CRCs of the
+     * names/comments match those of the extra fields, transfer the
+     * known Unicode values from the extra field.
+     */
+    private void setNameAndCommentFromExtraFields(ZipArchiveEntry ze,
+                                                  NameAndComment nc) {
+        UnicodePathExtraField name = (UnicodePathExtraField)
+            ze.getExtraField(UnicodePathExtraField.UPATH_ID);
+        String originalName = ze.getName();
+        String newName = getUnicodeStringIfOriginalMatches(name, nc.name);
+        if (newName != null && !originalName.equals(newName)) {
+            ze.setName(newName);
+            nameMap.remove(originalName);
+            nameMap.put(newName, ze);
+        }
+
+        if (nc.comment != null && nc.comment.length > 0) {
+            UnicodeCommentExtraField cmt = (UnicodeCommentExtraField)
+                ze.getExtraField(UnicodeCommentExtraField.UCOM_ID);
+            String newComment =
+                getUnicodeStringIfOriginalMatches(cmt, nc.comment);
+            if (newComment != null) {
+                ze.setComment(newComment);
+            }
+        }
+    }
+
+    /**
+     * If the stored CRC matches the one of the given name, return the
+     * Unicode name of the given field.
+     *
+     * <p>If the field is null or the CRCs don't match, return null
+     * instead.</p>
+     */
+    private String getUnicodeStringIfOriginalMatches(AbstractUnicodeExtraField 
f,
+                                                     byte[] orig) {
+        if (f != null) {
+            CRC32 crc32 = new CRC32();
+            crc32.update(orig);
+            long origCRC32 = crc32.getValue();
+
+            if (origCRC32 == f.getNameCRC32()) {
+                try {
+                    return ZipEncodingHelper
+                        .decodeName(f.getUnicodeName(),
+                                    ZipArchiveOutputStream.UTF8);
+                } catch (CharacterCodingException ex) {
+                    // UTF-8 unsupported?  should be impossible the
+                    // Unicode*ExtraField must contain some bad bytes
+
+                    // TODO log this anywhere?
+                    return null;
+                }
+            }
+        }
+        return null;
+    }
+
+    /**
      * InputStream that delegates requests to the underlying
      * RandomAccessFile, making sure that only bytes from a certain
      * range can be read.
@@ -634,4 +739,12 @@
         }
     }
 
+    private static final class NameAndComment {
+        private final byte[] name;
+        private final byte[] comment;
+        private NameAndComment(byte[] name, byte[] comment) {
+            this.name = name;
+            this.comment = comment;
+        }
+    }
 }

Modified: 
commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java
URL: 
http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java?rev=748133&r1=748132&r2=748133&view=diff
==============================================================================
--- 
commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java
 (original)
+++ 
commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java
 Thu Feb 26 13:15:14 2009
@@ -99,6 +99,23 @@
         }
     }
 
+    public void testZipFileReadsUnicodeFields() throws IOException {
+        File file = File.createTempFile("unicode-test", ".zip");
+        ZipFile zf = null;
+        try {
+            createTestFile(file, US_ASCII, false, true);
+            zf = new ZipFile(file, US_ASCII, true);
+            assertNotNull(zf.getEntry(ASCII_TXT));
+            assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT));
+            assertNotNull(zf.getEntry(OIL_BARREL_TXT));
+        } finally {
+            ZipFile.closeQuietly(zf);
+            if (file.exists()) {
+                file.delete();
+            }
+        }
+    }
+
     private static void testFileRoundtrip(String encoding, boolean withEFS,
                                           boolean withExplicitUnicodeExtra)
         throws IOException {


Reply via email to