FSchumacher commented on a change in pull request #352:
URL: https://github.com/apache/tomcat/pull/352#discussion_r489610355



##########
File path: java/org/apache/catalina/webresources/JarContents.java
##########
@@ -0,0 +1,122 @@
+package org.apache.catalina.webresources;
+
+import java.util.BitSet;
+import java.util.Enumeration;
+import java.util.jar.JarEntry;
+import java.util.jar.JarFile;
+
+/**
+ * This class represents the contents of a jar by determining whether a given
+ * resource <b>might</b> be in the cache, based on a bloom filter. This is not 
a
+ * general-purpose bloom filter because it contains logic to strip out
+ * characters from the beginning of the key.
+ *
+ * The hash methods are simple but good enough for this purpose.
+ */
+public final class JarContents {
+    private final BitSet bits1;
+    private final BitSet bits2;
+    /**
+     * Constant used by a typical hashing method.
+     */
+    private static final int HASH_PRIME_1 = 31;
+
+    /**
+     * Constant used by a typical hashing method.
+     */
+    private static final int HASH_PRIME_2 = 17;
+
+    /**
+     * Size of the fixed-length bit table. Larger reduces false positives,
+     * smaller saves memory.
+     */
+    private static final int TABLE_SIZE = 2048;
+
+    /**
+     * Parses the passed-in jar and populates the bit array.
+     *
+     * @param jar
+     */
+    public JarContents(JarFile jar) {
+        Enumeration<JarEntry> entries = jar.entries();
+        bits1 = new BitSet(TABLE_SIZE);
+        bits2 = new BitSet(TABLE_SIZE);
+
+        // Enumerations. When will they update this API?!
+        while (entries.hasMoreElements()) {
+            JarEntry entry = entries.nextElement();
+            String name = entry.getName();
+            int startPos = 0;
+
+            // If the path starts with a slash, that's not useful information.
+            // Skipping it increases the significance of our key by
+            // removing an insignificant character.
+            boolean precedingSlash = name.charAt(0) == '/';
+            if (precedingSlash) {
+                startPos = 1;
+            }
+
+            // Find the correct table slot
+            int pathHash1 = hashcode(name, startPos, HASH_PRIME_1);
+            int pathHash2 = hashcode(name, startPos, HASH_PRIME_2);
+
+            bits1.set(pathHash1 % TABLE_SIZE);
+            bits2.set(pathHash2 % TABLE_SIZE);
+        }
+    }
+
+    /**
+     * Simple hashcode of a portion of the string. Typically we would use
+     * substring, but memory and runtime speed are critical.
+     *
+     * @param content
+     *            Wrapping String.
+     * @param startPos
+     *            First character in the range.
+     * @return hashcode of the range.
+     */
+    private int hashcode(String content, int startPos, int hashPrime) {
+        int h = hashPrime/2;
+        int contentLength = content.length();
+        for (int i = startPos; i < contentLength; i++) {
+            h = hashPrime * h + content.charAt(i);
+        }
+
+        if (h < 0) {
+            h = h * -1;
+        }
+        return h;
+    }
+
+
+    /**
+     * Method that identifies whether a given path <b>MIGHT</b> be in this jar.
+     * Uses the Bloom filter mechanism.
+     *
+     * @param path
+     *            Requested path. Sometimes starts with "/WEB-INF/classes".
+     * @param webappRoot
+     *            The value of the webapp location, which can be stripped from
+     *            the path. Typically is "/WEB-INF/classes".
+     * @return Whether the prefix of the path is known to be in this jar.
+     */
+    public final boolean mightContainResource(String path, String webappRoot) {
+        int startPos = 0;
+        if (path.startsWith(webappRoot)) {
+            startPos = webappRoot.length();
+        }
+
+        if (path.charAt(startPos) == '/') {
+            // ignore leading slash
+            startPos++;
+        }
+
+        // Find the correct table slot
+        int pathHash1 = hashcode(path, startPos, HASH_PRIME_1);
+        int pathHash2 = hashcode(path, startPos, HASH_PRIME_2);
+        boolean probablyPresent = (bits1.get(pathHash1 % TABLE_SIZE) && 
bits2.get(pathHash2 % TABLE_SIZE));

Review comment:
       Wouldn't it be even faster, when you calculate pathHash2 lazyly, i.e. 
inlining the two int variables and use the fact, that the `&&` operator is lazy?
   The parenthesis seem to be not needed here, too.
   And last, why not return the result directly?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org
For additional commands, e-mail: dev-h...@tomcat.apache.org

Reply via email to