Claes, Since the ZIP index is already scanned for META-INF/ names in ZipFile.Source.initCEN, it might make sense to move version scanning there. This would also allow identifying version strings at the byte array level.
Here's a patch which implements this: diff --git a/src/java.base/share/classes/java/util/jar/JarFile.java b/src/java.base/share/classes/java/util/jar/JarFile.java index 1ec0f5bdae..988cc837f0 100644 --- a/src/java.base/share/classes/java/util/jar/JarFile.java +++ b/src/java.base/share/classes/java/util/jar/JarFile.java @@ -49,6 +49,7 @@ import java.util.Locale; import java.util.NoSuchElementException; import java.util.Objects; import java.util.function.Function; +import java.util.stream.IntStream; import java.util.stream.Stream; import java.util.zip.ZipEntry; import java.util.zip.ZipException; @@ -161,7 +162,7 @@ public class JarFile extends ZipFile { private final Runtime.Version version; // current version private final int versionFeature; // version.feature() private boolean isMultiRelease; // is jar multi-release? - + private int[] versions; // which versions does the jar contain // indicates if Class-Path attribute present private boolean hasClassPathAttribute; // true if manifest checked for special attributes @@ -599,12 +600,13 @@ public class JarFile extends ZipFile { } private JarEntry getVersionedEntry(String name, JarEntry je) { - if (BASE_VERSION_FEATURE < versionFeature) { + int[] versions = this.versions; + if (BASE_VERSION_FEATURE < versionFeature && versions != null && versions.length > 0) { if (!name.startsWith(META_INF)) { // search for versioned entry - int v = versionFeature; - while (v > BASE_VERSION_FEATURE) { - JarFileEntry vje = getEntry0(META_INF_VERSIONS + v + "/" + name); + int v = versions.length - 1; + while (v >= 0) { + JarFileEntry vje = getEntry0(META_INF_VERSIONS + versions[v] + "/" + name); if (vje != null) { return vje.withBasename(name); } @@ -1016,9 +1018,16 @@ public class JarFile extends ZipFile { byte[] lbuf = new byte[512]; Attributes attr = new Attributes(); attr.read(new Manifest.FastInputStream( - new ByteArrayInputStream(b)), lbuf); - isMultiRelease = Boolean.parseBoolean( - attr.getValue(Attributes.Name.MULTI_RELEASE)); + new ByteArrayInputStream(b)), lbuf); + if(Boolean.parseBoolean( + attr.getValue(Attributes.Name.MULTI_RELEASE))) { + isMultiRelease = true; + versions = IntStream.of(JUZFA.getMetaInfVersions(this)) + .filter(v -> v >= BASE_VERSION_FEATURE && v <= versionFeature) + .sorted() + .toArray(); + } + } } } diff --git a/src/java.base/share/classes/java/util/zip/ZipFile.java b/src/java.base/share/classes/java/util/zip/ZipFile.java index 274e8788d1..25eeb57555 100644 --- a/src/java.base/share/classes/java/util/zip/ZipFile.java +++ b/src/java.base/share/classes/java/util/zip/ZipFile.java @@ -48,6 +48,7 @@ import java.util.Iterator; import java.util.Objects; import java.util.NoSuchElementException; import java.util.Set; +import java.util.HashSet; import java.util.Spliterator; import java.util.Spliterators; import java.util.WeakHashMap; @@ -1063,6 +1064,11 @@ public class ZipFile implements ZipConstants, Closeable { return zip.getMetaInfEntryNames(); } @Override + public int[] getMetaInfVersions(ZipFile zip) { + final int[] metaVersions = zip.res.zsrc.metaVersions; + return metaVersions == null ? new int[0] : metaVersions; + } + @Override public JarEntry getEntry(ZipFile zip, String name, Function<String, JarEntry> func) { return (JarEntry)zip.getEntry(name, func); @@ -1097,6 +1103,7 @@ public class ZipFile implements ZipConstants, Closeable { private byte[] comment; // zip file comment // list of meta entries in META-INF dir private int[] metanames; + private int[] metaVersions; // list of unique versions found in META-INF/versions/ private final boolean startsWithLoc; // true, if zip file starts with LOCSIG (usually true) // A Hashmap for all entries. @@ -1424,6 +1431,8 @@ public class ZipFile implements ZipConstants, Closeable { // list for all meta entries ArrayList<Integer> metanamesList = null; + // Set of all version numbers seen in META-INF/versions/ + Set<Integer> metaVersionsSet = null; // Iterate through the entries in the central directory int i = 0; @@ -1461,6 +1470,12 @@ public class ZipFile implements ZipConstants, Closeable { if (metanamesList == null) metanamesList = new ArrayList<>(4); metanamesList.add(pos); + int version = getMetaVersion(cen, pos + CENHDR + 9, nlen); + if(version != -1) { + if(metaVersionsSet == null) + metaVersionsSet = new HashSet<>(); + metaVersionsSet.add(version); + } } // skip ext and comment pos += (CENHDR + nlen + elen + clen); @@ -1473,6 +1488,13 @@ public class ZipFile implements ZipConstants, Closeable { metanames[j] = metanamesList.get(j); } } + if(metaVersionsSet != null) { + metaVersions = new int[metaVersionsSet.size()]; + int c = 0; + for (Integer version : metaVersionsSet) { + metaVersions[c++] = version; + } + } if (pos + ENDHDR != cen.length) { zerror("invalid CEN header (bad header size)"); } @@ -1556,6 +1578,49 @@ public class ZipFile implements ZipConstants, Closeable { && (name[off] ) == '/'; } + /* + * If bytes represents a non-directory name beginning + * with "versions/", and continuing with an integer (version) + * followed by a '/', then return the parsed version integer. + * Otherwise, return -1 + */ + private static int getMetaVersion(byte[] name, int off, int len) { + int nend = off+len; + if(! (len > 9 // "versions/".length() + && name[off + len - 1] != '/' // non-directory + && (name[off++] | 0x20) == 'v' + && (name[off++] | 0x20) == 'e' + && (name[off++] | 0x20) == 'r' + && (name[off++] | 0x20) == 's' + && (name[off++] | 0x20) == 'i' + && (name[off++] | 0x20) == 'o' + && (name[off++] | 0x20) == 'n' + && (name[off++] | 0x20) == 's' + && (name[off++] ) == '/')) { + return -1; + } + int vstart = off; + for(int i = vstart; i < nend; i++) { + final byte c = name[i]; + if(c != '/' && (c < '0' || c > '9')) { + return -1; + } + if(c == '/') { + int vlen = i - vstart; + + if(vlen < 1) { + return -1; + } + try { + return Integer.parseInt(new String(name, vstart, vlen, UTF_8.INSTANCE)); + } catch (NumberFormatException e) { + return -1; + } + } + } + return -1; + } + /** * Returns the number of CEN headers in a central directory. * Will not throw, even if the zip file is corrupt. diff --git a/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java b/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java index 10187e2f50..0d808b9286 100644 --- a/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java +++ b/src/java.base/share/classes/jdk/internal/access/JavaUtilZipFileAccess.java @@ -35,6 +35,7 @@ import java.util.zip.ZipFile; public interface JavaUtilZipFileAccess { public boolean startsWithLocHeader(ZipFile zip); public String[] getMetaInfEntryNames(ZipFile zip); + public int[] getMetaInfVersions(ZipFile zip); public JarEntry getEntry(ZipFile zip, String name, Function<String, JarEntry> func); public Enumeration<JarEntry> entries(ZipFile zip, Function<String, JarEntry> func); public Stream<JarEntry> stream(ZipFile zip, Function<String, JarEntry> func); On Sun, Apr 12, 2020 at 10:50 AM Eirik Bjørsnøs <eir...@gmail.com> wrote: > I think you could tune away a significant part of that up front cost by >> using JUZFA.entryNameStream(this) instead of >> this.stream().map(ZipEntry::getName). This will avoid expanding each >> entry into a JarEntry internally. Perhaps this gets the up-front >> overhead down to more acceptable levels..? >> > > I found JUZFA.getMetaInfEntryNames which made the up front scanning > cost evaporate. > > Should be fast for other jars as well, at least when META-INF/ is sparsely > populated. > > Here's an updated patch: > > diff --git a/src/java.base/share/classes/java/util/jar/JarFile.java > b/src/java.base/share/classes/java/util/jar/JarFile.java > index 1ec0f5bdae..95472604c0 100644 > --- a/src/java.base/share/classes/java/util/jar/JarFile.java > +++ b/src/java.base/share/classes/java/util/jar/JarFile.java > @@ -161,7 +161,7 @@ public class JarFile extends ZipFile { > private final Runtime.Version version; // current version > private final int versionFeature; // version.feature() > private boolean isMultiRelease; // is jar multi-release? > - > + private int[] versions; // which versions does the > jar contain > // indicates if Class-Path attribute present > private boolean hasClassPathAttribute; > // true if manifest checked for special attributes > @@ -599,12 +599,13 @@ public class JarFile extends ZipFile { > } > > private JarEntry getVersionedEntry(String name, JarEntry je) { > - if (BASE_VERSION_FEATURE < versionFeature) { > + int[] versions = this.versions; > + if (BASE_VERSION_FEATURE < versionFeature && versions != null && > versions.length > 0) { > if (!name.startsWith(META_INF)) { > // search for versioned entry > - int v = versionFeature; > - while (v > BASE_VERSION_FEATURE) { > - JarFileEntry vje = getEntry0(META_INF_VERSIONS + v + > "/" + name); > + int v = versions.length - 1; > + while (v >= 0) { > + JarFileEntry vje = getEntry0(META_INF_VERSIONS + > versions[v] + "/" + name); > if (vje != null) { > return vje.withBasename(name); > } > @@ -1016,9 +1017,18 @@ public class JarFile extends ZipFile { > byte[] lbuf = new byte[512]; > Attributes attr = new Attributes(); > attr.read(new Manifest.FastInputStream( > - new ByteArrayInputStream(b)), lbuf); > - isMultiRelease = Boolean.parseBoolean( > - attr.getValue(Attributes.Name.MULTI_RELEASE)); > + new ByteArrayInputStream(b)), lbuf); > + if(Boolean.parseBoolean( > + > attr.getValue(Attributes.Name.MULTI_RELEASE))) { > + isMultiRelease = true; > + versions = > Stream.of(JUZFA.getMetaInfEntryNames(this)) > + .mapToInt(this::parseVersion) > + .filter(v -> v != -1 && v >= > BASE_VERSION_FEATURE && v <= versionFeature) > + .distinct() > + .sorted() > + .toArray(); > + } > + > } > } > } > @@ -1026,6 +1036,27 @@ public class JarFile extends ZipFile { > } > } > > + /** > + * If {@code entryName} is a a versioned entry, parse and return the > version as an integer, otherwise return -1 > + */ > + private int parseVersion(String entryName) { > + if(!entryName.startsWith(META_INF_VERSIONS)) { > + return -1; > + } > + > + int separator = entryName.indexOf("/", > META_INF_VERSIONS.length()); > + > + if(separator == -1) { > + return -1; > + } > + > + try { > + return Integer.parseInt(entryName, > META_INF_VERSIONS.length(), separator, 10); > + } catch (NumberFormatException e) { > + return -1; > + } > + } > + > synchronized void ensureInitialization() { > try { > maybeInstantiateVerifier(); > > Eirik. > >