Author: bodewig Date: Thu Feb 26 13:15:14 2009 New Revision: 748133 URL: http://svn.apache.org/viewvc?rev=748133&view=rev Log: optionally use UnicodeExtraFields to set names and comments of entries when reading. SANDBOX-176
Modified: commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java Modified: commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java URL: http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java?rev=748133&r1=748132&r2=748133&view=diff ============================================================================== --- commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java (original) +++ commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java Thu Feb 26 13:15:14 2009 @@ -184,8 +184,12 @@ * <code>"UTF-8"</code> is supported in ZIP file * version <code>6.3</code> or later. */ - static final String decodeName(byte[] name, String encoding) { + static final String decodeName(byte[] name, String encoding) + throws java.nio.charset.CharacterCodingException { Charset cs = Charset.forName(encoding); - return cs.decode(ByteBuffer.wrap(name)).toString(); + return cs.newDecoder() + .onMalformedInput(CodingErrorAction.REPORT) + .onUnmappableCharacter(CodingErrorAction.REPORT) + .decode(ByteBuffer.wrap(name)).toString(); } } Modified: commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java URL: http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java?rev=748133&r1=748132&r2=748133&view=diff ============================================================================== --- commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java (original) +++ commons/sandbox/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java Thu Feb 26 13:15:14 2009 @@ -22,12 +22,14 @@ import java.io.InputStream; import java.io.RandomAccessFile; import java.io.UnsupportedEncodingException; +import java.nio.charset.CharacterCodingException; import java.util.Calendar; import java.util.Collections; import java.util.Date; import java.util.Enumeration; import java.util.HashMap; import java.util.Map; +import java.util.zip.CRC32; import java.util.zip.Inflater; import java.util.zip.InflaterInputStream; import java.util.zip.ZipException; @@ -101,6 +103,11 @@ private RandomAccessFile archive; /** + * Whether to look for and use Unicode extra fields. + */ + private final boolean useUnicodeExtraFields; + + /** * Opens the given file for reading, assuming "UTF8" for file names. * * @param f the archive. @@ -124,7 +131,7 @@ /** * Opens the given file for reading, assuming the specified - * encoding for file names. + * encoding for file names and ignoring unicode extra fields. * * @param name name of the archive. * @param encoding the encoding to use for file names, use null @@ -133,12 +140,12 @@ * @throws IOException if an error occurs while reading the file. */ public ZipFile(String name, String encoding) throws IOException { - this(new File(name), encoding); + this(new File(name), encoding, false); } /** * Opens the given file for reading, assuming the specified - * encoding for file names. + * encoding for file names and ignoring unicode extra fields. * * @param f the archive. * @param encoding the encoding to use for file names, use null @@ -147,12 +154,30 @@ * @throws IOException if an error occurs while reading the file. */ public ZipFile(File f, String encoding) throws IOException { + this(f, encoding, false); + } + + /** + * Opens the given file for reading, assuming the specified + * encoding for file names. + * + * @param f the archive. + * @param encoding the encoding to use for file names, use null + * for the platform's default encoding + * @param whether to use InfoZIP Unicode Extra Fields (if present) + * to set the file names. + * + * @throws IOException if an error occurs while reading the file. + */ + public ZipFile(File f, String encoding, boolean useUnicodeExtraFields) + throws IOException { this.encoding = encoding; + this.useUnicodeExtraFields = useUnicodeExtraFields; archive = new RandomAccessFile(f, "r"); boolean success = false; try { - populateFromCentralDirectory(); - resolveLocalFileHeaderData(); + Map entriesWithoutEFS = populateFromCentralDirectory(); + resolveLocalFileHeaderData(entriesWithoutEFS); success = true; } finally { if (!success) { @@ -269,9 +294,15 @@ * <p>The ZipArchiveEntrys will know all data that can be obtained from * the central directory alone, but not the data that requires the * local file header or additional data to be read.</p> + * + * @return a Map<ZipArchiveEntry, NameAndComment>> of + * zipentries that didn't have the language encoding flag set when + * read. */ - private void populateFromCentralDirectory() + private Map populateFromCentralDirectory() throws IOException { + HashMap noEFS = new HashMap(); + positionAtCentralDirectory(); byte[] cfh = new byte[CFH_LEN]; @@ -296,10 +327,10 @@ off += SHORT; // skip version info final int generalPurposeFlag = ZipShort.getValue(cfh, off); - final String entryEncoding = - (generalPurposeFlag & ZipArchiveOutputStream.EFS_FLAG) != 0 - ? ZipArchiveOutputStream.UTF8 - : encoding; + final boolean hasEFS = + (generalPurposeFlag & ZipArchiveOutputStream.EFS_FLAG) != 0; + final String entryEncoding = + hasEFS ? ZipArchiveOutputStream.UTF8 : encoding; off += SHORT; @@ -367,7 +398,12 @@ archive.readFully(signatureBytes); sig = ZipLong.getValue(signatureBytes); + + if (!hasEFS && useUnicodeExtraFields) { + noEFS.put(ze, new NameAndComment(fileName, comment)); + } } + return noEFS; } private static final int MIN_EOCD_SIZE = @@ -462,7 +498,7 @@ * <p>Also records the offsets for the data to read from the * entries.</p> */ - private void resolveLocalFileHeaderData() + private void resolveLocalFileHeaderData(Map entriesWithoutEFS) throws IOException { Enumeration e = getEntries(); while (e.hasMoreElements()) { @@ -493,6 +529,12 @@ */ offsetEntry.dataOffset = offset + LFH_OFFSET_FOR_FILENAME_LENGTH + SHORT + SHORT + fileNameLen + extraFieldLen; + + if (entriesWithoutEFS.containsKey(ze)) { + setNameAndCommentFromExtraFields(ze, + (NameAndComment) + entriesWithoutEFS.get(ze)); + } } } @@ -538,7 +580,11 @@ return new String(bytes); } else { try { - return ZipEncodingHelper.decodeName(bytes, enc); + try { + return ZipEncodingHelper.decodeName(bytes, enc); + } catch (CharacterCodingException ex) { + throw new ZipException(ex.getMessage()); + } } catch (java.nio.charset.UnsupportedCharsetException ex) { // Java 1.4's NIO doesn't recognize a few names that // String.getBytes does @@ -568,6 +614,65 @@ } /** + * If the entry has Unicode*ExtraFields and the CRCs of the + * names/comments match those of the extra fields, transfer the + * known Unicode values from the extra field. + */ + private void setNameAndCommentFromExtraFields(ZipArchiveEntry ze, + NameAndComment nc) { + UnicodePathExtraField name = (UnicodePathExtraField) + ze.getExtraField(UnicodePathExtraField.UPATH_ID); + String originalName = ze.getName(); + String newName = getUnicodeStringIfOriginalMatches(name, nc.name); + if (newName != null && !originalName.equals(newName)) { + ze.setName(newName); + nameMap.remove(originalName); + nameMap.put(newName, ze); + } + + if (nc.comment != null && nc.comment.length > 0) { + UnicodeCommentExtraField cmt = (UnicodeCommentExtraField) + ze.getExtraField(UnicodeCommentExtraField.UCOM_ID); + String newComment = + getUnicodeStringIfOriginalMatches(cmt, nc.comment); + if (newComment != null) { + ze.setComment(newComment); + } + } + } + + /** + * If the stored CRC matches the one of the given name, return the + * Unicode name of the given field. + * + * <p>If the field is null or the CRCs don't match, return null + * instead.</p> + */ + private String getUnicodeStringIfOriginalMatches(AbstractUnicodeExtraField f, + byte[] orig) { + if (f != null) { + CRC32 crc32 = new CRC32(); + crc32.update(orig); + long origCRC32 = crc32.getValue(); + + if (origCRC32 == f.getNameCRC32()) { + try { + return ZipEncodingHelper + .decodeName(f.getUnicodeName(), + ZipArchiveOutputStream.UTF8); + } catch (CharacterCodingException ex) { + // UTF-8 unsupported? should be impossible the + // Unicode*ExtraField must contain some bad bytes + + // TODO log this anywhere? + return null; + } + } + } + return null; + } + + /** * InputStream that delegates requests to the underlying * RandomAccessFile, making sure that only bytes from a certain * range can be read. @@ -634,4 +739,12 @@ } } + private static final class NameAndComment { + private final byte[] name; + private final byte[] comment; + private NameAndComment(byte[] name, byte[] comment) { + this.name = name; + this.comment = comment; + } + } } Modified: commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java URL: http://svn.apache.org/viewvc/commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java?rev=748133&r1=748132&r2=748133&view=diff ============================================================================== --- commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java (original) +++ commons/sandbox/compress/trunk/src/test/java/org/apache/commons/compress/archivers/zip/UTF8ZipFilesTest.java Thu Feb 26 13:15:14 2009 @@ -99,6 +99,23 @@ } } + public void testZipFileReadsUnicodeFields() throws IOException { + File file = File.createTempFile("unicode-test", ".zip"); + ZipFile zf = null; + try { + createTestFile(file, US_ASCII, false, true); + zf = new ZipFile(file, US_ASCII, true); + assertNotNull(zf.getEntry(ASCII_TXT)); + assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT)); + assertNotNull(zf.getEntry(OIL_BARREL_TXT)); + } finally { + ZipFile.closeQuietly(zf); + if (file.exists()) { + file.delete(); + } + } + } + private static void testFileRoundtrip(String encoding, boolean withEFS, boolean withExplicitUnicodeExtra) throws IOException {