This is an automated email from the ASF dual-hosted git repository.

markt pushed a commit to branch 7.0.x
in repository https://gitbox.apache.org/repos/asf/tomcat.git


The following commit(s) were added to refs/heads/7.0.x by this push:
     new 89cbf02  Fix https://bz.apache.org/bugzilla/show_bug.cgi?id=63235
89cbf02 is described below

commit 89cbf0252def59b999919b77af1211f44b41c98e
Author: Mark Thomas <ma...@apache.org>
AuthorDate: Fri Mar 8 20:45:38 2019 +0000

    Fix https://bz.apache.org/bugzilla/show_bug.cgi?id=63235
    
    Implement an alternative Charset caching strategy that does not load all
    Charsets at Tomcat start. This reduces start time by ~30ms and does not,
    based on the performance tests included in this commit, have a negative
    impact on runtime look-ups.
    It does require that the names of all supported Charsets are known to
    Tomcat at compile time. The code has been tested with a range of JVMs
    and a unit test is provided for testing new JVMs.
---
 java/org/apache/tomcat/util/buf/B2CConverter.java  |  23 +---
 java/org/apache/tomcat/util/buf/CharsetCache.java  | 134 ++++++++++++++++++++
 .../apache/tomcat/util/buf/TestCharsetCache.java   |  65 ++++++++++
 .../util/buf/TestCharsetCachePerformance.java      | 141 +++++++++++++++++++++
 webapps/docs/changelog.xml                         |   3 +
 5 files changed, 350 insertions(+), 16 deletions(-)

diff --git a/java/org/apache/tomcat/util/buf/B2CConverter.java 
b/java/org/apache/tomcat/util/buf/B2CConverter.java
index 4492fbd..31ad092 100644
--- a/java/org/apache/tomcat/util/buf/B2CConverter.java
+++ b/java/org/apache/tomcat/util/buf/B2CConverter.java
@@ -24,9 +24,7 @@ import java.nio.charset.Charset;
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CoderResult;
 import java.nio.charset.CodingErrorAction;
-import java.util.HashMap;
 import java.util.Locale;
-import java.util.Map;
 
 import org.apache.tomcat.util.res.StringManager;
 
@@ -38,8 +36,7 @@ public class B2CConverter {
     private static final StringManager sm =
         StringManager.getManager(Constants.Package);
 
-    private static final Map<String, Charset> encodingToCharsetCache =
-        new HashMap<String, Charset>();
+    private static final CharsetCache charsetCache;
 
     public static final Charset ISO_8859_1;
     public static final Charset UTF_8;
@@ -48,14 +45,8 @@ public class B2CConverter {
     protected static final int LEFTOVER_SIZE = 9;
 
     static {
-        for (Charset charset: Charset.availableCharsets().values()) {
-            encodingToCharsetCache.put(
-                    charset.name().toLowerCase(Locale.ENGLISH), charset);
-            for (String alias : charset.aliases()) {
-                encodingToCharsetCache.put(
-                        alias.toLowerCase(Locale.ENGLISH), charset);
-            }
-        }
+        charsetCache = new CharsetCache();
+
         Charset iso88591 = null;
         Charset utf8 = null;
         try {
@@ -84,7 +75,7 @@ public class B2CConverter {
     public static Charset getCharsetLower(String lowerCaseEnc)
             throws UnsupportedEncodingException {
 
-        Charset charset = encodingToCharsetCache.get(lowerCaseEnc);
+        Charset charset = charsetCache.getCharset(lowerCaseEnc);
 
         if (charset == null) {
             // Pre-population of the cache means this must be invalid
@@ -130,7 +121,7 @@ public class B2CConverter {
         decoder.onUnmappableCharacter(action);
     }
 
-    /** 
+    /**
      * Reset the decoder state.
      */
     public void recycle() {
@@ -140,7 +131,7 @@ public class B2CConverter {
 
     /**
      * Convert the given bytes to characters.
-     * 
+     *
      * @param bc byte input
      * @param cc char output
      * @param endOfInput    Is this all of the available data
@@ -157,7 +148,7 @@ public class B2CConverter {
         }
         if ((cb == null) || (cb.array() != cc.getBuffer())) {
             // Create a new char buffer if anything changed
-            cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(), 
+            cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(),
                     cc.getBuffer().length - cc.getEnd());
         } else {
             // Initialize the char buffer
diff --git a/java/org/apache/tomcat/util/buf/CharsetCache.java 
b/java/org/apache/tomcat/util/buf/CharsetCache.java
new file mode 100644
index 0000000..bde4583
--- /dev/null
+++ b/java/org/apache/tomcat/util/buf/CharsetCache.java
@@ -0,0 +1,134 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+package org.apache.tomcat.util.buf;
+
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.util.Locale;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+
+public class CharsetCache {
+
+    private static final String[] INITIAL_CHARSETS = new String[] { 
"iso-8859-1", "utf-8" };
+
+    /*
+     * Tested with:
+     *  - Oracle JDK 8 u192
+     *  - OpenJDK 13 EA 4
+     */
+    private static final String[] LAZY_CHARSETS = new String[] {
+            "big5", "big5-hkscs", "cesu-8", "euc-jp", "euc-kr", "gb18030", 
"gb2312", "gbk", "ibm-thai", "ibm00858",
+            "ibm01140", "ibm01141", "ibm01142", "ibm01143", "ibm01144", 
"ibm01145", "ibm01146", "ibm01147", "ibm01148",
+            "ibm01149", "ibm037", "ibm1026", "ibm1047", "ibm273", "ibm277", 
"ibm278", "ibm280", "ibm284", "ibm285",
+            "ibm290", "ibm297", "ibm420", "ibm424", "ibm437", "ibm500", 
"ibm775", "ibm850", "ibm852", "ibm855",
+            "ibm857", "ibm860", "ibm861", "ibm862", "ibm863", "ibm864", 
"ibm865", "ibm866", "ibm868", "ibm869",
+            "ibm870", "ibm871", "ibm918", "iso-2022-cn", "iso-2022-jp", 
"iso-2022-jp-2", "iso-2022-kr", "iso-8859-13",
+            "iso-8859-15", "iso-8859-2", "iso-8859-3", "iso-8859-4", 
"iso-8859-5", "iso-8859-6", "iso-8859-7",
+            "iso-8859-8", "iso-8859-9", "iso-8859-16", "jis_x0201", 
"jis_x0212-1990", "koi8-r", "koi8-u", "shift_jis",
+            "tis-620", "us-ascii", "utf-16", "utf-16be", "utf-16le", "utf-32", 
"utf-32be", "utf-32le", "x-utf-32be-bom",
+            "x-utf-32le-bom", "windows-1250", "windows-1251", "windows-1252", 
"windows-1253", "windows-1254",
+            "windows-1255", "windows-1256", "windows-1257", "windows-1258", 
"windows-31j", "x-big5-hkscs-2001",
+            "x-big5-solaris", "x-compound_text", "x-euc-tw", "x-ibm1006", 
"x-ibm1025", "x-ibm1046", "x-ibm1097",
+            "x-ibm1098", "x-ibm1112", "x-ibm1122", "x-ibm1123", "x-ibm1124", 
"x-ibm1129", "x-ibm1166", "x-ibm1364",
+            "x-ibm1381", "x-ibm1383", "x-ibm300", "x-ibm33722", "x-ibm737", 
"x-ibm833", "x-ibm834", "x-ibm856",
+            "x-ibm874", "x-ibm875", "x-ibm921", "x-ibm922", "x-ibm930", 
"x-ibm933", "x-ibm935", "x-ibm937", "x-ibm939",
+            "x-ibm942", "x-ibm942c", "x-ibm943", "x-ibm943c", "x-ibm948", 
"x-ibm949", "x-ibm949c", "x-ibm950",
+            "x-ibm964", "x-ibm970", "x-iscii91", "x-iso-2022-cn-cns", 
"x-iso-2022-cn-gb", "x-jis0208",
+            "x-jisautodetect", "x-johab", "x-ms932_0213", "x-ms950-hkscs", 
"x-ms950-hkscs-xp", "x-macarabic",
+            "x-maccentraleurope", "x-maccroatian", "x-maccyrillic", 
"x-macdingbat", "x-macgreek", "x-machebrew",
+            "x-maciceland", "x-macroman", "x-macromania", "x-macsymbol", 
"x-macthai", "x-macturkish", "x-macukraine",
+            "x-pck", "x-sjis_0213", "x-utf-16le-bom", "x-euc-jp-linux", 
"x-eucjp-open", "x-iso-8859-11", "x-mswin-936",
+            "x-windows-50220", "x-windows-50221", "x-windows-874", 
"x-windows-949", "x-windows-950",
+            "x-windows-iso2022jp"
+            };
+
+    private static final Charset DUMMY_CHARSET = new DummyCharset("Dummy",  
null);
+
+    private ConcurrentMap<String,Charset> cache = new ConcurrentHashMap<>();
+
+    public CharsetCache() {
+        // Pre-populate the cache
+        for (String charsetName : INITIAL_CHARSETS) {
+            Charset charset = Charset.forName(charsetName);
+            addToCache(charsetName, charset);
+        }
+
+        for (String charsetName : LAZY_CHARSETS) {
+            addToCache(charsetName, DUMMY_CHARSET);
+        }
+    }
+
+
+    private void addToCache(String name, Charset charset) {
+        cache.put(name, charset);
+        for (String alias : charset.aliases()) {
+            cache.put(alias.toLowerCase(Locale.ENGLISH), charset);
+        }
+    }
+
+
+    public Charset getCharset(String charsetName) {
+        String lcCharsetName = charsetName.toLowerCase(Locale.ENGLISH);
+
+        Charset result = cache.get(lcCharsetName);
+
+        if (result == DUMMY_CHARSET) {
+            // Name is known but the Charset is not in the cache
+            Charset charset = Charset.forName(lcCharsetName);
+            if (charset == null) {
+                // Charset not available in this JVM - remove cache entry
+                cache.remove(lcCharsetName);
+                result = null;
+            } else {
+                // Charset is available - populate cache entry
+                addToCache(lcCharsetName, charset);
+                result = charset;
+            }
+        }
+
+        return result;
+    }
+
+
+    /*
+     * Placeholder Charset implementation for entries that will be loaded 
lazily
+     * into the cache.
+     */
+    private static class DummyCharset extends Charset {
+
+        protected DummyCharset(String canonicalName, String[] aliases) {
+            super(canonicalName, aliases);
+        }
+
+        @Override
+        public boolean contains(Charset cs) {
+            return false;
+        }
+
+        @Override
+        public CharsetDecoder newDecoder() {
+            return null;
+        }
+
+        @Override
+        public CharsetEncoder newEncoder() {
+            return null;
+        }
+    }
+}
diff --git a/test/org/apache/tomcat/util/buf/TestCharsetCache.java 
b/test/org/apache/tomcat/util/buf/TestCharsetCache.java
new file mode 100644
index 0000000..011b08c
--- /dev/null
+++ b/test/org/apache/tomcat/util/buf/TestCharsetCache.java
@@ -0,0 +1,65 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+package org.apache.tomcat.util.buf;
+
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestCharsetCache {
+
+    @Test
+    public void testAllKnownCharsets() {
+        CharsetCache cache = new CharsetCache();
+
+        List<String> cacheMisses = new ArrayList<>();
+
+        for (Charset charset: Charset.availableCharsets().values()) {
+            if (cache.getCharset(charset.name()) == null) {
+                cacheMisses.add(charset.name());
+            } else {
+                for (String alias : charset.aliases()) {
+                    if (cache.getCharset(alias) == null) {
+                        cacheMisses.add(alias);
+                    }
+                }
+            }
+        }
+
+        if (cacheMisses.size() != 0) {
+            StringBuilder sb = new StringBuilder();
+            Collections.sort(cacheMisses);
+            for (String name : cacheMisses) {
+                if (sb.length() == 0) {
+                    sb.append('"');
+                } else {
+                    sb.append(", \"");
+                }
+                sb.append(name.toLowerCase(Locale.ENGLISH));
+                sb.append('"');
+            }
+            System.out.println(sb.toString());
+        }
+
+        Assert.assertTrue(cacheMisses.size() == 0);
+    }
+}
diff --git a/test/org/apache/tomcat/util/buf/TestCharsetCachePerformance.java 
b/test/org/apache/tomcat/util/buf/TestCharsetCachePerformance.java
new file mode 100644
index 0000000..6823890
--- /dev/null
+++ b/test/org/apache/tomcat/util/buf/TestCharsetCachePerformance.java
@@ -0,0 +1,141 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+package org.apache.tomcat.util.buf;
+
+import java.nio.charset.Charset;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+
+import org.junit.Test;
+
+public class TestCharsetCachePerformance {
+
+    @Test
+    public void testNoCsCache() throws Exception {
+        doTest(new NoCsCache());
+    }
+
+
+    @Test
+    public void testFullCsCache() throws Exception {
+        doTest(new FullCsCache());
+    }
+
+
+    @Test
+    public void testLazyCsCache() throws Exception {
+        doTest(new LazyCsCache());
+    }
+
+
+    private void doTest(CsCache cache) throws Exception {
+        int threadCount = 10;
+        int iterations = 10000000;
+        String[] lookupNames = new String[] {
+                "ISO-8859-1", "ISO-8859-2", "ISO-8859-3", "ISO-8859-4", 
"ISO-8859-5" };
+
+        Thread[] threads = new Thread[threadCount];
+
+        for (int i = 0; i < threadCount; i++) {
+            threads[i] = new TestCsCacheThread(iterations, cache, lookupNames);
+        }
+
+        long startTime = System.nanoTime();
+
+        for (int i = 0; i < threadCount; i++) {
+            threads[i].start();
+        }
+
+        for (int i = 0; i < threadCount; i++) {
+            threads[i].join();
+        }
+
+        long endTime = System.nanoTime();
+
+        System.out.println(cache.getClass().getName() + ": " + (endTime - 
startTime) + "ns");
+    }
+
+
+    private static interface CsCache {
+        Charset getCharset(String charsetName);
+    }
+
+
+    private static class NoCsCache implements CsCache {
+
+        @Override
+        public Charset getCharset(String charsetName) {
+            return Charset.forName(charsetName);
+        }
+    }
+
+
+    private static class FullCsCache implements CsCache {
+
+        private static final Map<String,Charset> cache = new HashMap<>();
+
+        static {
+            for (Charset charset: Charset.availableCharsets().values()) {
+                cache.put(charset.name().toLowerCase(Locale.ENGLISH), charset);
+                for (String alias : charset.aliases()) {
+                    cache.put(alias.toLowerCase(Locale.ENGLISH), charset);
+                }
+            }
+        }
+
+
+        @Override
+        public Charset getCharset(String charsetName) {
+            return cache.get(charsetName.toLowerCase(Locale.ENGLISH));
+        }
+    }
+
+
+    private static class LazyCsCache implements CsCache {
+
+        private CharsetCache cache = new CharsetCache();
+
+        @Override
+        public Charset getCharset(String charsetName) {
+            return cache.getCharset(charsetName);
+        }
+    }
+
+
+    private static class TestCsCacheThread extends Thread {
+
+        private final int iterations;
+        private final CsCache cache;
+        private final String[] lookupNames;
+        private final int lookupNamesCount;
+
+        public TestCsCacheThread(int iterations, CsCache cache, String[] 
lookupNames) {
+            this.iterations = iterations;
+            this.cache = cache;
+            this.lookupNames = lookupNames;
+            this.lookupNamesCount = lookupNames.length;
+        }
+
+        @Override
+        public void run() {
+            for (int i = 0; i < iterations; i++) {
+                cache.getCharset(lookupNames[i % lookupNamesCount]);
+            }
+        }
+    }
+}
diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml
index 3256fba..c628646 100644
--- a/webapps/docs/changelog.xml
+++ b/webapps/docs/changelog.xml
@@ -74,6 +74,9 @@
         <code>roleNested</code> set to <code>true</code>. (markt)
       </fix>
       <fix>
+        <bug>63235</bug>: Refactor Charset cache to reduce start time. (markt)
+      </fix>
+      <fix>
         <bug>63236</bug>: Use <code>String.intern()</code> as suggested by
         Phillip Webb to reduce memory wasted due to String duplication. This
         changes saves ~245k when starting a clean installation. With additional


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org
For additional commands, e-mail: dev-h...@tomcat.apache.org

Reply via email to