This is an automated email from the ASF dual-hosted git repository.
remm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tomcat.git
The following commit(s) were added to refs/heads/master by this push:
new ce17ede Add a bloom filter to speed up archive lookup
new 99647b3 Merge branch 'master' of [email protected]:apache/tomcat.git
ce17ede is described below
commit ce17edeee6c7c1a5965be4e60a4d1ccff96fbc38
Author: remm <[email protected]>
AuthorDate: Wed Sep 23 11:49:12 2020 +0200
Add a bloom filter to speed up archive lookup
The can improve deployment speed of applications with a large number of
JARs.
Patch provided by Jatin Kamnani, with changes. PR #352
---
java/org/apache/catalina/Context.java | 13 ++
java/org/apache/catalina/core/StandardContext.java | 19 +++
.../apache/catalina/core/mbeans-descriptors.xml | 4 +
.../org/apache/catalina/startup/FailedContext.java | 6 +
.../webresources/AbstractArchiveResourceSet.java | 16 ++-
.../apache/catalina/webresources/JarContents.java | 135 +++++++++++++++++++++
.../catalina/webresources/TestJarContents.java | 93 ++++++++++++++
test/org/apache/tomcat/unittest/TesterContext.java | 6 +
webapps/docs/changelog.xml | 5 +
webapps/docs/config/context.xml | 8 ++
10 files changed, 303 insertions(+), 2 deletions(-)
diff --git a/java/org/apache/catalina/Context.java
b/java/org/apache/catalina/Context.java
index 4cf844c..275063b 100644
--- a/java/org/apache/catalina/Context.java
+++ b/java/org/apache/catalina/Context.java
@@ -1946,4 +1946,17 @@ public interface Context extends Container, ContextBind {
public void setDispatcherWrapsSameObject(boolean
dispatcherWrapsSameObject);
+ /**
+ * @return <code>true</code> if the resources archive lookup will
+ * use a bloom filter.
+ */
+ public boolean getUseBloomFilterForArchives();
+
+ /**
+ * Set bloom filter flag value.
+ *
+ * @param useBloomFilterForArchives The new fast class path scan flag
+ */
+ public void setUseBloomFilterForArchives(boolean
useBloomFilterForArchives);
+
}
diff --git a/java/org/apache/catalina/core/StandardContext.java
b/java/org/apache/catalina/core/StandardContext.java
index a34f14e..abb123f 100644
--- a/java/org/apache/catalina/core/StandardContext.java
+++ b/java/org/apache/catalina/core/StandardContext.java
@@ -835,6 +835,8 @@ public class StandardContext extends ContainerBase
private boolean parallelAnnotationScanning = false;
+ private boolean useBloomFilterForArchives = false;
+
// ----------------------------------------------------- Context Properties
@Override
@@ -1450,6 +1452,23 @@ public class StandardContext extends ContainerBase
@Override
+ public boolean getUseBloomFilterForArchives() {
+ return this.useBloomFilterForArchives;
+ }
+
+
+ @Override
+ public void setUseBloomFilterForArchives(boolean
useBloomFilterForArchives) {
+
+ boolean oldUseBloomFilterForArchives = this.useBloomFilterForArchives;
+ this.useBloomFilterForArchives = useBloomFilterForArchives;
+ support.firePropertyChange("useBloomFilterForArchives",
oldUseBloomFilterForArchives,
+ this.useBloomFilterForArchives);
+
+ }
+
+
+ @Override
public void setParallelAnnotationScanning(boolean
parallelAnnotationScanning) {
boolean oldParallelAnnotationScanning =
this.parallelAnnotationScanning;
diff --git a/java/org/apache/catalina/core/mbeans-descriptors.xml
b/java/org/apache/catalina/core/mbeans-descriptors.xml
index 50be99f..c02cff3 100644
--- a/java/org/apache/catalina/core/mbeans-descriptors.xml
+++ b/java/org/apache/catalina/core/mbeans-descriptors.xml
@@ -325,6 +325,10 @@
description="Unpack WAR property"
type="boolean"/>
+ <attribute name="useBloomFilterForArchives"
+ description="Use a bloom filter for archives lookups"
+ type="boolean"/>
+
<attribute name="useHttpOnly"
description="Indicates that session cookies should use HttpOnly"
type="boolean"/>
diff --git a/java/org/apache/catalina/startup/FailedContext.java
b/java/org/apache/catalina/startup/FailedContext.java
index 84b12f5..4fee683 100644
--- a/java/org/apache/catalina/startup/FailedContext.java
+++ b/java/org/apache/catalina/startup/FailedContext.java
@@ -836,4 +836,10 @@ public class FailedContext extends LifecycleMBeanBase
implements Context {
@Override
public void setParallelAnnotationScanning(boolean
parallelAnnotationScanning) {}
+ @Override
+ public boolean getUseBloomFilterForArchives() { return false; }
+
+ @Override
+ public void setUseBloomFilterForArchives(boolean
useBloomFilterForArchives) {}
+
}
\ No newline at end of file
diff --git
a/java/org/apache/catalina/webresources/AbstractArchiveResourceSet.java
b/java/org/apache/catalina/webresources/AbstractArchiveResourceSet.java
index 8473480..2236491 100644
--- a/java/org/apache/catalina/webresources/AbstractArchiveResourceSet.java
+++ b/java/org/apache/catalina/webresources/AbstractArchiveResourceSet.java
@@ -36,12 +36,11 @@ public abstract class AbstractArchiveResourceSet extends
AbstractResourceSet {
private URL baseUrl;
private String baseUrlString;
-
private JarFile archive = null;
protected Map<String,JarEntry> archiveEntries = null;
protected final Object archiveLock = new Object();
private long archiveUseCount = 0;
-
+ private JarContents jarContents;
protected final void setBaseUrl(URL baseUrl) {
this.baseUrl = baseUrl;
@@ -212,6 +211,14 @@ public abstract class AbstractArchiveResourceSet extends
AbstractResourceSet {
WebResourceRoot root = getRoot();
/*
+ * If jarContents reports that this resource definitely does not
contain
+ * the path, we can end this method and move on to the next jar.
+ */
+ if (jarContents != null && !jarContents.mightContainResource(path,
webAppMount)) {
+ return new EmptyResource(root, path);
+ }
+
+ /*
* Implementation notes
*
* The path parameter passed into this method always starts with '/'.
@@ -305,6 +312,10 @@ public abstract class AbstractArchiveResourceSet extends
AbstractResourceSet {
synchronized (archiveLock) {
if (archive == null) {
archive =
JreCompat.getInstance().jarFileNewInstance(getBase());
+ WebResourceRoot root = getRoot();
+ if ((root.getContext() != null) &&
root.getContext().getUseBloomFilterForArchives()) {
+ jarContents = new JarContents(archive);
+ }
}
archiveUseCount++;
return archive;
@@ -328,6 +339,7 @@ public abstract class AbstractArchiveResourceSet extends
AbstractResourceSet {
}
archive = null;
archiveEntries = null;
+ jarContents = null;
}
}
}
diff --git a/java/org/apache/catalina/webresources/JarContents.java
b/java/org/apache/catalina/webresources/JarContents.java
new file mode 100644
index 0000000..1642d4c
--- /dev/null
+++ b/java/org/apache/catalina/webresources/JarContents.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.catalina.webresources;
+
+import java.util.BitSet;
+import java.util.Enumeration;
+import java.util.jar.JarEntry;
+import java.util.jar.JarFile;
+
+/**
+ * This class represents the contents of a jar by determining whether a given
+ * resource <b>might</b> be in the cache, based on a bloom filter. This is not
a
+ * general-purpose bloom filter because it contains logic to strip out
+ * characters from the beginning of the key.
+ *
+ * The hash methods are simple but good enough for this purpose.
+ */
+public final class JarContents {
+ private final BitSet bits1;
+ private final BitSet bits2;
+ /**
+ * Constant used by a typical hashing method.
+ */
+ private static final int HASH_PRIME_1 = 31;
+
+ /**
+ * Constant used by a typical hashing method.
+ */
+ private static final int HASH_PRIME_2 = 17;
+
+ /**
+ * Size of the fixed-length bit table. Larger reduces false positives,
+ * smaller saves memory.
+ */
+ private static final int TABLE_SIZE = 2048;
+
+ /**
+ * Parses the passed-in jar and populates the bit array.
+ *
+ * @param jar
+ */
+ public JarContents(JarFile jar) {
+ Enumeration<JarEntry> entries = jar.entries();
+ bits1 = new BitSet(TABLE_SIZE);
+ bits2 = new BitSet(TABLE_SIZE);
+
+ // Enumerations. When will they update this API?!
+ while (entries.hasMoreElements()) {
+ JarEntry entry = entries.nextElement();
+ String name = entry.getName();
+ int startPos = 0;
+
+ // If the path starts with a slash, that's not useful information.
+ // Skipping it increases the significance of our key by
+ // removing an insignificant character.
+ boolean precedingSlash = name.charAt(0) == '/';
+ if (precedingSlash) {
+ startPos = 1;
+ }
+
+ // Find the correct table slot
+ int pathHash1 = hashcode(name, startPos, HASH_PRIME_1);
+ int pathHash2 = hashcode(name, startPos, HASH_PRIME_2);
+
+ bits1.set(pathHash1 % TABLE_SIZE);
+ bits2.set(pathHash2 % TABLE_SIZE);
+ }
+ }
+
+ /**
+ * Simple hashcode of a portion of the string. Typically we would use
+ * substring, but memory and runtime speed are critical.
+ *
+ * @param content
+ * Wrapping String.
+ * @param startPos
+ * First character in the range.
+ * @return hashcode of the range.
+ */
+ private int hashcode(String content, int startPos, int hashPrime) {
+ int h = hashPrime/2;
+ int contentLength = content.length();
+ for (int i = startPos; i < contentLength; i++) {
+ h = hashPrime * h + content.charAt(i);
+ }
+
+ if (h < 0) {
+ h = h * -1;
+ }
+ return h;
+ }
+
+
+ /**
+ * Method that identifies whether a given path <b>MIGHT</b> be in this jar.
+ * Uses the Bloom filter mechanism.
+ *
+ * @param path
+ * Requested path. Sometimes starts with "/WEB-INF/classes".
+ * @param webappRoot
+ * The value of the webapp location, which can be stripped from
+ * the path. Typically is "/WEB-INF/classes".
+ * @return Whether the prefix of the path is known to be in this jar.
+ */
+ public final boolean mightContainResource(String path, String webappRoot) {
+ int startPos = 0;
+ if (path.startsWith(webappRoot)) {
+ startPos = webappRoot.length();
+ }
+
+ if (path.charAt(startPos) == '/') {
+ // ignore leading slash
+ startPos++;
+ }
+
+ // calculate the hash lazyly and return a boolean value for this path
+ return (bits1.get(hashcode(path, startPos, HASH_PRIME_1) % TABLE_SIZE)
&&
+ bits2.get(hashcode(path, startPos, HASH_PRIME_2) %
TABLE_SIZE));
+ }
+
+}
\ No newline at end of file
diff --git a/test/org/apache/catalina/webresources/TestJarContents.java
b/test/org/apache/catalina/webresources/TestJarContents.java
new file mode 100644
index 0000000..6d55281
--- /dev/null
+++ b/test/org/apache/catalina/webresources/TestJarContents.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.catalina.webresources;
+
+import java.io.File;
+import java.util.jar.JarFile;
+
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import org.apache.catalina.WebResourceSet;
+
+/**
+ * @author Kamnani, Jatin
+ */
+public class TestJarContents {
+
+ private static File empty;
+ private static File jar;
+ private static TesterWebResourceRoot root;
+ private static WebResourceSet webResourceSet;
+ private static JarResourceSet test;
+ private static JarContents testJarContentsObject;
+
+ @BeforeClass
+ public static void setup() {
+ try {
+ empty = new File("test/webresources/dir3");
+ jar = new File("test/webresources/dir1.jar");
+
+ root = new TesterWebResourceRoot();
+
+ // Use empty dir for root of web app.
+ webResourceSet = new DirResourceSet(root, "/",
empty.getAbsolutePath(), "/");
+ root.setMainResources(webResourceSet);
+
+ // If this JAR was in a web application, this is equivalent to how it
+ // would be added
+ test = new JarResourceSet(root, "/", jar.getAbsolutePath(),
"/META-INF/resources");
+ test.setStaticOnly(true);
+ root.addJarResources(test);
+
+ testJarContentsObject = new JarContents(new
JarFile("test/webresources/dir1.jar"));
+
+ } catch (Exception e) {
+ Assert.fail("Error happened while testing JarContents, " +
e.getMessage());
+ }
+ }
+
+ @Test
+ public void testMightContainResource() {
+ Assert.assertTrue(testJarContentsObject.mightContainResource(
+ "/d1/d1-f1.txt", jar.getAbsolutePath()));
+
+ Assert.assertTrue(testJarContentsObject.mightContainResource(
+ "d1/d1-f1.txt", jar.getAbsolutePath()));
+
+ Assert.assertFalse(testJarContentsObject.mightContainResource(
+ "/d7/d1-f1.txt", jar.getAbsolutePath()));
+
+ Assert.assertFalse(testJarContentsObject.mightContainResource(
+ "/", jar.getAbsolutePath()));
+
+ Assert.assertFalse(testJarContentsObject.mightContainResource(
+ "/////", jar.getAbsolutePath()));
+
+ }
+
+ @Test(expected = StringIndexOutOfBoundsException.class)
+ public void testStringOutOfBoundExceptions() {
+ testJarContentsObject.mightContainResource("", jar.getAbsolutePath());
+ }
+
+ @Test(expected = NullPointerException.class)
+ public void testNullPointerExceptions() {
+ testJarContentsObject.mightContainResource(null,
jar.getAbsolutePath());
+ }
+}
diff --git a/test/org/apache/tomcat/unittest/TesterContext.java
b/test/org/apache/tomcat/unittest/TesterContext.java
index 33b1355..0d0e65f 100644
--- a/test/org/apache/tomcat/unittest/TesterContext.java
+++ b/test/org/apache/tomcat/unittest/TesterContext.java
@@ -1295,4 +1295,10 @@ public class TesterContext implements Context {
@Override
public void setParallelAnnotationScanning(boolean
parallelAnnotationScanning) {}
+ @Override
+ public boolean getUseBloomFilterForArchives() { return false; }
+
+ @Override
+ public void setUseBloomFilterForArchives(boolean
useBloomFilterForArchives) {}
+
}
diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml
index d7e275b..0290bdc 100644
--- a/webapps/docs/changelog.xml
+++ b/webapps/docs/changelog.xml
@@ -79,6 +79,11 @@
When performing an incremental build, ensure bdn does not create
unwanted JPMS dependencies between embedded JARs. (markt)
</fix>
+ <update>
+ Add a bloom filter to speed up archive lookup and improve deployment
+ speed of applications with a large number of JARs. Patch
+ provided by Jatin Kamnani. (remm)
+ </update>
</changelog>
</subsection>
<subsection name="Coyote">
diff --git a/webapps/docs/config/context.xml b/webapps/docs/config/context.xml
index 2eaf349..5dfcbcf 100644
--- a/webapps/docs/config/context.xml
+++ b/webapps/docs/config/context.xml
@@ -635,6 +635,14 @@
penalty.</p>
</attribute>
+ <attribute name="useBloomFilterForArchives" required="false">
+ <p>If this is <code>true</code> then a bloom filter will be used to
+ speed up archive lookups. This can be beneficial to the deployment
+ speed to web applications that contain very large amount of JARs.</p>
+ <p>If not specified, the default value of <code>false</code> will be
+ used.</p>
+ </attribute>
+
<attribute name="useHttpOnly" required="false">
<p>Should the HttpOnly flag be set on session cookies to prevent client
side script from accessing the session ID? Defaults to
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]