This is an automated email from the ASF dual-hosted git repository. remm pushed a commit to branch 9.0.x in repository https://gitbox.apache.org/repos/asf/tomcat.git
The following commit(s) were added to refs/heads/9.0.x by this push: new ec2369d Add a bloom filter to speed up archive lookup ec2369d is described below commit ec2369d4398f84bbc5f9a2fe338053439bd0b301 Author: remm <r...@apache.org> AuthorDate: Wed Sep 23 11:49:12 2020 +0200 Add a bloom filter to speed up archive lookup The can improve deployment speed of applications with a large number of JARs. Patch provided by Jatin Kamnani, with changes. PR #352 --- java/org/apache/catalina/Context.java | 15 +++ java/org/apache/catalina/core/StandardContext.java | 19 +++ .../apache/catalina/core/mbeans-descriptors.xml | 4 + .../org/apache/catalina/startup/FailedContext.java | 6 + .../webresources/AbstractArchiveResourceSet.java | 16 ++- .../apache/catalina/webresources/JarContents.java | 135 +++++++++++++++++++++ .../catalina/webresources/TestJarContents.java | 93 ++++++++++++++ test/org/apache/tomcat/unittest/TesterContext.java | 6 + webapps/docs/changelog.xml | 5 + webapps/docs/config/context.xml | 8 ++ 10 files changed, 305 insertions(+), 2 deletions(-) diff --git a/java/org/apache/catalina/Context.java b/java/org/apache/catalina/Context.java index 5aac225..a53b977 100644 --- a/java/org/apache/catalina/Context.java +++ b/java/org/apache/catalina/Context.java @@ -1919,4 +1919,19 @@ public interface Context extends Container, ContextBind { * otherwise {@code false} */ public boolean getCreateUploadTargets(); + + + /** + * @return <code>true</code> if the resources archive lookup will + * use a bloom filter. + */ + public boolean getUseBloomFilterForArchives(); + + /** + * Set bloom filter flag value. + * + * @param useBloomFilterForArchives The new fast class path scan flag + */ + public void setUseBloomFilterForArchives(boolean useBloomFilterForArchives); + } diff --git a/java/org/apache/catalina/core/StandardContext.java b/java/org/apache/catalina/core/StandardContext.java index 06f7621..6db44d3 100644 --- a/java/org/apache/catalina/core/StandardContext.java +++ b/java/org/apache/catalina/core/StandardContext.java @@ -830,6 +830,8 @@ public class StandardContext extends ContainerBase private boolean parallelAnnotationScanning = false; + private boolean useBloomFilterForArchives = false; + // ----------------------------------------------------- Context Properties @Override @@ -1407,6 +1409,23 @@ public class StandardContext extends ContainerBase @Override + public boolean getUseBloomFilterForArchives() { + return this.useBloomFilterForArchives; + } + + + @Override + public void setUseBloomFilterForArchives(boolean useBloomFilterForArchives) { + + boolean oldUseBloomFilterForArchives = this.useBloomFilterForArchives; + this.useBloomFilterForArchives = useBloomFilterForArchives; + support.firePropertyChange("useBloomFilterForArchives", oldUseBloomFilterForArchives, + this.useBloomFilterForArchives); + + } + + + @Override public void setParallelAnnotationScanning(boolean parallelAnnotationScanning) { boolean oldParallelAnnotationScanning = this.parallelAnnotationScanning; diff --git a/java/org/apache/catalina/core/mbeans-descriptors.xml b/java/org/apache/catalina/core/mbeans-descriptors.xml index 50be99f..c02cff3 100644 --- a/java/org/apache/catalina/core/mbeans-descriptors.xml +++ b/java/org/apache/catalina/core/mbeans-descriptors.xml @@ -325,6 +325,10 @@ description="Unpack WAR property" type="boolean"/> + <attribute name="useBloomFilterForArchives" + description="Use a bloom filter for archives lookups" + type="boolean"/> + <attribute name="useHttpOnly" description="Indicates that session cookies should use HttpOnly" type="boolean"/> diff --git a/java/org/apache/catalina/startup/FailedContext.java b/java/org/apache/catalina/startup/FailedContext.java index 7919943..21e5a81 100644 --- a/java/org/apache/catalina/startup/FailedContext.java +++ b/java/org/apache/catalina/startup/FailedContext.java @@ -828,4 +828,10 @@ public class FailedContext extends LifecycleMBeanBase implements Context { @Override public void setParallelAnnotationScanning(boolean parallelAnnotationScanning) {} + @Override + public boolean getUseBloomFilterForArchives() { return false; } + + @Override + public void setUseBloomFilterForArchives(boolean useBloomFilterForArchives) {} + } \ No newline at end of file diff --git a/java/org/apache/catalina/webresources/AbstractArchiveResourceSet.java b/java/org/apache/catalina/webresources/AbstractArchiveResourceSet.java index 8473480..2236491 100644 --- a/java/org/apache/catalina/webresources/AbstractArchiveResourceSet.java +++ b/java/org/apache/catalina/webresources/AbstractArchiveResourceSet.java @@ -36,12 +36,11 @@ public abstract class AbstractArchiveResourceSet extends AbstractResourceSet { private URL baseUrl; private String baseUrlString; - private JarFile archive = null; protected Map<String,JarEntry> archiveEntries = null; protected final Object archiveLock = new Object(); private long archiveUseCount = 0; - + private JarContents jarContents; protected final void setBaseUrl(URL baseUrl) { this.baseUrl = baseUrl; @@ -212,6 +211,14 @@ public abstract class AbstractArchiveResourceSet extends AbstractResourceSet { WebResourceRoot root = getRoot(); /* + * If jarContents reports that this resource definitely does not contain + * the path, we can end this method and move on to the next jar. + */ + if (jarContents != null && !jarContents.mightContainResource(path, webAppMount)) { + return new EmptyResource(root, path); + } + + /* * Implementation notes * * The path parameter passed into this method always starts with '/'. @@ -305,6 +312,10 @@ public abstract class AbstractArchiveResourceSet extends AbstractResourceSet { synchronized (archiveLock) { if (archive == null) { archive = JreCompat.getInstance().jarFileNewInstance(getBase()); + WebResourceRoot root = getRoot(); + if ((root.getContext() != null) && root.getContext().getUseBloomFilterForArchives()) { + jarContents = new JarContents(archive); + } } archiveUseCount++; return archive; @@ -328,6 +339,7 @@ public abstract class AbstractArchiveResourceSet extends AbstractResourceSet { } archive = null; archiveEntries = null; + jarContents = null; } } } diff --git a/java/org/apache/catalina/webresources/JarContents.java b/java/org/apache/catalina/webresources/JarContents.java new file mode 100644 index 0000000..1642d4c --- /dev/null +++ b/java/org/apache/catalina/webresources/JarContents.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.catalina.webresources; + +import java.util.BitSet; +import java.util.Enumeration; +import java.util.jar.JarEntry; +import java.util.jar.JarFile; + +/** + * This class represents the contents of a jar by determining whether a given + * resource <b>might</b> be in the cache, based on a bloom filter. This is not a + * general-purpose bloom filter because it contains logic to strip out + * characters from the beginning of the key. + * + * The hash methods are simple but good enough for this purpose. + */ +public final class JarContents { + private final BitSet bits1; + private final BitSet bits2; + /** + * Constant used by a typical hashing method. + */ + private static final int HASH_PRIME_1 = 31; + + /** + * Constant used by a typical hashing method. + */ + private static final int HASH_PRIME_2 = 17; + + /** + * Size of the fixed-length bit table. Larger reduces false positives, + * smaller saves memory. + */ + private static final int TABLE_SIZE = 2048; + + /** + * Parses the passed-in jar and populates the bit array. + * + * @param jar + */ + public JarContents(JarFile jar) { + Enumeration<JarEntry> entries = jar.entries(); + bits1 = new BitSet(TABLE_SIZE); + bits2 = new BitSet(TABLE_SIZE); + + // Enumerations. When will they update this API?! + while (entries.hasMoreElements()) { + JarEntry entry = entries.nextElement(); + String name = entry.getName(); + int startPos = 0; + + // If the path starts with a slash, that's not useful information. + // Skipping it increases the significance of our key by + // removing an insignificant character. + boolean precedingSlash = name.charAt(0) == '/'; + if (precedingSlash) { + startPos = 1; + } + + // Find the correct table slot + int pathHash1 = hashcode(name, startPos, HASH_PRIME_1); + int pathHash2 = hashcode(name, startPos, HASH_PRIME_2); + + bits1.set(pathHash1 % TABLE_SIZE); + bits2.set(pathHash2 % TABLE_SIZE); + } + } + + /** + * Simple hashcode of a portion of the string. Typically we would use + * substring, but memory and runtime speed are critical. + * + * @param content + * Wrapping String. + * @param startPos + * First character in the range. + * @return hashcode of the range. + */ + private int hashcode(String content, int startPos, int hashPrime) { + int h = hashPrime/2; + int contentLength = content.length(); + for (int i = startPos; i < contentLength; i++) { + h = hashPrime * h + content.charAt(i); + } + + if (h < 0) { + h = h * -1; + } + return h; + } + + + /** + * Method that identifies whether a given path <b>MIGHT</b> be in this jar. + * Uses the Bloom filter mechanism. + * + * @param path + * Requested path. Sometimes starts with "/WEB-INF/classes". + * @param webappRoot + * The value of the webapp location, which can be stripped from + * the path. Typically is "/WEB-INF/classes". + * @return Whether the prefix of the path is known to be in this jar. + */ + public final boolean mightContainResource(String path, String webappRoot) { + int startPos = 0; + if (path.startsWith(webappRoot)) { + startPos = webappRoot.length(); + } + + if (path.charAt(startPos) == '/') { + // ignore leading slash + startPos++; + } + + // calculate the hash lazyly and return a boolean value for this path + return (bits1.get(hashcode(path, startPos, HASH_PRIME_1) % TABLE_SIZE) && + bits2.get(hashcode(path, startPos, HASH_PRIME_2) % TABLE_SIZE)); + } + +} \ No newline at end of file diff --git a/test/org/apache/catalina/webresources/TestJarContents.java b/test/org/apache/catalina/webresources/TestJarContents.java new file mode 100644 index 0000000..6d55281 --- /dev/null +++ b/test/org/apache/catalina/webresources/TestJarContents.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.catalina.webresources; + +import java.io.File; +import java.util.jar.JarFile; + +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.catalina.WebResourceSet; + +/** + * @author Kamnani, Jatin + */ +public class TestJarContents { + + private static File empty; + private static File jar; + private static TesterWebResourceRoot root; + private static WebResourceSet webResourceSet; + private static JarResourceSet test; + private static JarContents testJarContentsObject; + + @BeforeClass + public static void setup() { + try { + empty = new File("test/webresources/dir3"); + jar = new File("test/webresources/dir1.jar"); + + root = new TesterWebResourceRoot(); + + // Use empty dir for root of web app. + webResourceSet = new DirResourceSet(root, "/", empty.getAbsolutePath(), "/"); + root.setMainResources(webResourceSet); + + // If this JAR was in a web application, this is equivalent to how it + // would be added + test = new JarResourceSet(root, "/", jar.getAbsolutePath(), "/META-INF/resources"); + test.setStaticOnly(true); + root.addJarResources(test); + + testJarContentsObject = new JarContents(new JarFile("test/webresources/dir1.jar")); + + } catch (Exception e) { + Assert.fail("Error happened while testing JarContents, " + e.getMessage()); + } + } + + @Test + public void testMightContainResource() { + Assert.assertTrue(testJarContentsObject.mightContainResource( + "/d1/d1-f1.txt", jar.getAbsolutePath())); + + Assert.assertTrue(testJarContentsObject.mightContainResource( + "d1/d1-f1.txt", jar.getAbsolutePath())); + + Assert.assertFalse(testJarContentsObject.mightContainResource( + "/d7/d1-f1.txt", jar.getAbsolutePath())); + + Assert.assertFalse(testJarContentsObject.mightContainResource( + "/", jar.getAbsolutePath())); + + Assert.assertFalse(testJarContentsObject.mightContainResource( + "/////", jar.getAbsolutePath())); + + } + + @Test(expected = StringIndexOutOfBoundsException.class) + public void testStringOutOfBoundExceptions() { + testJarContentsObject.mightContainResource("", jar.getAbsolutePath()); + } + + @Test(expected = NullPointerException.class) + public void testNullPointerExceptions() { + testJarContentsObject.mightContainResource(null, jar.getAbsolutePath()); + } +} diff --git a/test/org/apache/tomcat/unittest/TesterContext.java b/test/org/apache/tomcat/unittest/TesterContext.java index d2cfe78..4c67da9 100644 --- a/test/org/apache/tomcat/unittest/TesterContext.java +++ b/test/org/apache/tomcat/unittest/TesterContext.java @@ -1294,4 +1294,10 @@ public class TesterContext implements Context { @Override public void setParallelAnnotationScanning(boolean parallelAnnotationScanning) {} + @Override + public boolean getUseBloomFilterForArchives() { return false; } + + @Override + public void setUseBloomFilterForArchives(boolean useBloomFilterForArchives) {} + } diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml index a2e7d97..d6e28f3 100644 --- a/webapps/docs/changelog.xml +++ b/webapps/docs/changelog.xml @@ -78,6 +78,11 @@ When performing an incremental build, ensure bdn does not create unwanted JPMS dependencies between embedded JARs. (markt) </fix> + <update> + Add a bloom filter to speed up archive lookup and improve deployment + speed of applications with a large number of JARs. Patch + provided by Jatin Kamnani. (remm) + </update> </changelog> </subsection> <subsection name="Coyote"> diff --git a/webapps/docs/config/context.xml b/webapps/docs/config/context.xml index ea30612..3816736 100644 --- a/webapps/docs/config/context.xml +++ b/webapps/docs/config/context.xml @@ -605,6 +605,14 @@ penalty.</p> </attribute> + <attribute name="useBloomFilterForArchives" required="false"> + <p>If this is <code>true</code> then a bloom filter will be used to + speed up archive lookups. This can be beneficial to the deployment + speed to web applications that contain very large amount of JARs.</p> + <p>If not specified, the default value of <code>false</code> will be + used.</p> + </attribute> + <attribute name="useHttpOnly" required="false"> <p>Should the HttpOnly flag be set on session cookies to prevent client side script from accessing the session ID? Defaults to --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org