Hi, ZipEntry.getEntryPos currently has a double loop which retries search after a failed lookup by appending a '/' to the name/hash. This means that any miss needs to query the hash table twice.
The following patch updates hashN to truncate any '/' at the end of an entry name. This ensures that the hash of META-INF and META-INF/ will always collide and the double query loop is no longer needed. I'm seeing ~35% time saving on misses and a slight improvement on hits. Removing the double loop also improves readability. diff --git a/src/java.base/share/classes/java/util/zip/ZipFile.java b/src/java.base/share/classes/java/util/zip/ZipFile.java index 274e8788d1..fd6a4e276c 100644 --- a/src/java.base/share/classes/java/util/zip/ZipFile.java +++ b/src/java.base/share/classes/java/util/zip/ZipFile.java @@ -1266,6 +1266,9 @@ public class ZipFile implements ZipConstants, Closeable { } private static final int hashN(byte[] a, int off, int len) { + if(a[off + len -1] == '/') { + len--; + } int h = 1; while (len-- > 0) { h = 31 * h + a[off++]; @@ -1493,48 +1496,31 @@ public class ZipFile implements ZipConstants, Closeable { int hsh = hashN(name, 0, name.length); int idx = table[(hsh & 0x7fffffff) % tablelen]; /* - * This while loop is an optimization where a double lookup - * for name and name+/ is being performed. The name char - * array has enough room at the end to try again with a - * slash appended if the first table lookup does not succeed. + * Search down the target hash chain for a entry whose + * 32 bit hash matches the hashed name. */ - while (true) { - /* - * Search down the target hash chain for a entry whose - * 32 bit hash matches the hashed name. - */ - while (idx != ZIP_ENDCHAIN) { - if (getEntryHash(idx) == hsh) { - // The CEN name must match the specfied one - int pos = getEntryPos(idx); - if (name.length == CENNAM(cen, pos)) { - boolean matched = true; - int nameoff = pos + CENHDR; - for (int i = 0; i < name.length; i++) { - if (name[i] != cen[nameoff++]) { - matched = false; - break; - } - } - if (matched) { - return pos; + while (idx != ZIP_ENDCHAIN) { + if (getEntryHash(idx) == hsh) { + // The CEN name must match the specfied one + int pos = getEntryPos(idx); + final int nlen = CENNAM(cen, pos); + if (name.length == nlen || (addSlash && name.length == nlen - 1 && cen[pos + CENHDR + nlen - 1] == '/')) { + boolean matched = true; + int nameoff = pos + CENHDR; + for (int i = 0; i < name.length; i++) { + if (name[i] != cen[nameoff++]) { + matched = false; + break; } - } + } + if (matched) { + return pos; + } } - idx = getEntryNext(idx); - } - /* If not addSlash, or slash is already there, we are done */ - if (!addSlash || name.length == 0 || name[name.length - 1] == '/') { - return -1; } - /* Add slash and try once more */ - name = Arrays.copyOf(name, name.length + 1); - name[name.length - 1] = '/'; - hsh = hash_append(hsh, (byte)'/'); - //idx = table[hsh % tablelen]; - idx = table[(hsh & 0x7fffffff) % tablelen]; - addSlash = false; + idx = getEntryNext(idx); } + return -1; } /**