This is an automated email from the ASF dual-hosted git repository. markt pushed a commit to branch 7.0.x in repository https://gitbox.apache.org/repos/asf/tomcat.git
The following commit(s) were added to refs/heads/7.0.x by this push: new 89cbf02 Fix https://bz.apache.org/bugzilla/show_bug.cgi?id=63235 89cbf02 is described below commit 89cbf0252def59b999919b77af1211f44b41c98e Author: Mark Thomas <ma...@apache.org> AuthorDate: Fri Mar 8 20:45:38 2019 +0000 Fix https://bz.apache.org/bugzilla/show_bug.cgi?id=63235 Implement an alternative Charset caching strategy that does not load all Charsets at Tomcat start. This reduces start time by ~30ms and does not, based on the performance tests included in this commit, have a negative impact on runtime look-ups. It does require that the names of all supported Charsets are known to Tomcat at compile time. The code has been tested with a range of JVMs and a unit test is provided for testing new JVMs. --- java/org/apache/tomcat/util/buf/B2CConverter.java | 23 +--- java/org/apache/tomcat/util/buf/CharsetCache.java | 134 ++++++++++++++++++++ .../apache/tomcat/util/buf/TestCharsetCache.java | 65 ++++++++++ .../util/buf/TestCharsetCachePerformance.java | 141 +++++++++++++++++++++ webapps/docs/changelog.xml | 3 + 5 files changed, 350 insertions(+), 16 deletions(-) diff --git a/java/org/apache/tomcat/util/buf/B2CConverter.java b/java/org/apache/tomcat/util/buf/B2CConverter.java index 4492fbd..31ad092 100644 --- a/java/org/apache/tomcat/util/buf/B2CConverter.java +++ b/java/org/apache/tomcat/util/buf/B2CConverter.java @@ -24,9 +24,7 @@ import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CoderResult; import java.nio.charset.CodingErrorAction; -import java.util.HashMap; import java.util.Locale; -import java.util.Map; import org.apache.tomcat.util.res.StringManager; @@ -38,8 +36,7 @@ public class B2CConverter { private static final StringManager sm = StringManager.getManager(Constants.Package); - private static final Map<String, Charset> encodingToCharsetCache = - new HashMap<String, Charset>(); + private static final CharsetCache charsetCache; public static final Charset ISO_8859_1; public static final Charset UTF_8; @@ -48,14 +45,8 @@ public class B2CConverter { protected static final int LEFTOVER_SIZE = 9; static { - for (Charset charset: Charset.availableCharsets().values()) { - encodingToCharsetCache.put( - charset.name().toLowerCase(Locale.ENGLISH), charset); - for (String alias : charset.aliases()) { - encodingToCharsetCache.put( - alias.toLowerCase(Locale.ENGLISH), charset); - } - } + charsetCache = new CharsetCache(); + Charset iso88591 = null; Charset utf8 = null; try { @@ -84,7 +75,7 @@ public class B2CConverter { public static Charset getCharsetLower(String lowerCaseEnc) throws UnsupportedEncodingException { - Charset charset = encodingToCharsetCache.get(lowerCaseEnc); + Charset charset = charsetCache.getCharset(lowerCaseEnc); if (charset == null) { // Pre-population of the cache means this must be invalid @@ -130,7 +121,7 @@ public class B2CConverter { decoder.onUnmappableCharacter(action); } - /** + /** * Reset the decoder state. */ public void recycle() { @@ -140,7 +131,7 @@ public class B2CConverter { /** * Convert the given bytes to characters. - * + * * @param bc byte input * @param cc char output * @param endOfInput Is this all of the available data @@ -157,7 +148,7 @@ public class B2CConverter { } if ((cb == null) || (cb.array() != cc.getBuffer())) { // Create a new char buffer if anything changed - cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(), + cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(), cc.getBuffer().length - cc.getEnd()); } else { // Initialize the char buffer diff --git a/java/org/apache/tomcat/util/buf/CharsetCache.java b/java/org/apache/tomcat/util/buf/CharsetCache.java new file mode 100644 index 0000000..bde4583 --- /dev/null +++ b/java/org/apache/tomcat/util/buf/CharsetCache.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tomcat.util.buf; + +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.util.Locale; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; + +public class CharsetCache { + + private static final String[] INITIAL_CHARSETS = new String[] { "iso-8859-1", "utf-8" }; + + /* + * Tested with: + * - Oracle JDK 8 u192 + * - OpenJDK 13 EA 4 + */ + private static final String[] LAZY_CHARSETS = new String[] { + "big5", "big5-hkscs", "cesu-8", "euc-jp", "euc-kr", "gb18030", "gb2312", "gbk", "ibm-thai", "ibm00858", + "ibm01140", "ibm01141", "ibm01142", "ibm01143", "ibm01144", "ibm01145", "ibm01146", "ibm01147", "ibm01148", + "ibm01149", "ibm037", "ibm1026", "ibm1047", "ibm273", "ibm277", "ibm278", "ibm280", "ibm284", "ibm285", + "ibm290", "ibm297", "ibm420", "ibm424", "ibm437", "ibm500", "ibm775", "ibm850", "ibm852", "ibm855", + "ibm857", "ibm860", "ibm861", "ibm862", "ibm863", "ibm864", "ibm865", "ibm866", "ibm868", "ibm869", + "ibm870", "ibm871", "ibm918", "iso-2022-cn", "iso-2022-jp", "iso-2022-jp-2", "iso-2022-kr", "iso-8859-13", + "iso-8859-15", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", + "iso-8859-8", "iso-8859-9", "iso-8859-16", "jis_x0201", "jis_x0212-1990", "koi8-r", "koi8-u", "shift_jis", + "tis-620", "us-ascii", "utf-16", "utf-16be", "utf-16le", "utf-32", "utf-32be", "utf-32le", "x-utf-32be-bom", + "x-utf-32le-bom", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254", + "windows-1255", "windows-1256", "windows-1257", "windows-1258", "windows-31j", "x-big5-hkscs-2001", + "x-big5-solaris", "x-compound_text", "x-euc-tw", "x-ibm1006", "x-ibm1025", "x-ibm1046", "x-ibm1097", + "x-ibm1098", "x-ibm1112", "x-ibm1122", "x-ibm1123", "x-ibm1124", "x-ibm1129", "x-ibm1166", "x-ibm1364", + "x-ibm1381", "x-ibm1383", "x-ibm300", "x-ibm33722", "x-ibm737", "x-ibm833", "x-ibm834", "x-ibm856", + "x-ibm874", "x-ibm875", "x-ibm921", "x-ibm922", "x-ibm930", "x-ibm933", "x-ibm935", "x-ibm937", "x-ibm939", + "x-ibm942", "x-ibm942c", "x-ibm943", "x-ibm943c", "x-ibm948", "x-ibm949", "x-ibm949c", "x-ibm950", + "x-ibm964", "x-ibm970", "x-iscii91", "x-iso-2022-cn-cns", "x-iso-2022-cn-gb", "x-jis0208", + "x-jisautodetect", "x-johab", "x-ms932_0213", "x-ms950-hkscs", "x-ms950-hkscs-xp", "x-macarabic", + "x-maccentraleurope", "x-maccroatian", "x-maccyrillic", "x-macdingbat", "x-macgreek", "x-machebrew", + "x-maciceland", "x-macroman", "x-macromania", "x-macsymbol", "x-macthai", "x-macturkish", "x-macukraine", + "x-pck", "x-sjis_0213", "x-utf-16le-bom", "x-euc-jp-linux", "x-eucjp-open", "x-iso-8859-11", "x-mswin-936", + "x-windows-50220", "x-windows-50221", "x-windows-874", "x-windows-949", "x-windows-950", + "x-windows-iso2022jp" + }; + + private static final Charset DUMMY_CHARSET = new DummyCharset("Dummy", null); + + private ConcurrentMap<String,Charset> cache = new ConcurrentHashMap<>(); + + public CharsetCache() { + // Pre-populate the cache + for (String charsetName : INITIAL_CHARSETS) { + Charset charset = Charset.forName(charsetName); + addToCache(charsetName, charset); + } + + for (String charsetName : LAZY_CHARSETS) { + addToCache(charsetName, DUMMY_CHARSET); + } + } + + + private void addToCache(String name, Charset charset) { + cache.put(name, charset); + for (String alias : charset.aliases()) { + cache.put(alias.toLowerCase(Locale.ENGLISH), charset); + } + } + + + public Charset getCharset(String charsetName) { + String lcCharsetName = charsetName.toLowerCase(Locale.ENGLISH); + + Charset result = cache.get(lcCharsetName); + + if (result == DUMMY_CHARSET) { + // Name is known but the Charset is not in the cache + Charset charset = Charset.forName(lcCharsetName); + if (charset == null) { + // Charset not available in this JVM - remove cache entry + cache.remove(lcCharsetName); + result = null; + } else { + // Charset is available - populate cache entry + addToCache(lcCharsetName, charset); + result = charset; + } + } + + return result; + } + + + /* + * Placeholder Charset implementation for entries that will be loaded lazily + * into the cache. + */ + private static class DummyCharset extends Charset { + + protected DummyCharset(String canonicalName, String[] aliases) { + super(canonicalName, aliases); + } + + @Override + public boolean contains(Charset cs) { + return false; + } + + @Override + public CharsetDecoder newDecoder() { + return null; + } + + @Override + public CharsetEncoder newEncoder() { + return null; + } + } +} diff --git a/test/org/apache/tomcat/util/buf/TestCharsetCache.java b/test/org/apache/tomcat/util/buf/TestCharsetCache.java new file mode 100644 index 0000000..011b08c --- /dev/null +++ b/test/org/apache/tomcat/util/buf/TestCharsetCache.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tomcat.util.buf; + +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Locale; + +import org.junit.Assert; +import org.junit.Test; + +public class TestCharsetCache { + + @Test + public void testAllKnownCharsets() { + CharsetCache cache = new CharsetCache(); + + List<String> cacheMisses = new ArrayList<>(); + + for (Charset charset: Charset.availableCharsets().values()) { + if (cache.getCharset(charset.name()) == null) { + cacheMisses.add(charset.name()); + } else { + for (String alias : charset.aliases()) { + if (cache.getCharset(alias) == null) { + cacheMisses.add(alias); + } + } + } + } + + if (cacheMisses.size() != 0) { + StringBuilder sb = new StringBuilder(); + Collections.sort(cacheMisses); + for (String name : cacheMisses) { + if (sb.length() == 0) { + sb.append('"'); + } else { + sb.append(", \""); + } + sb.append(name.toLowerCase(Locale.ENGLISH)); + sb.append('"'); + } + System.out.println(sb.toString()); + } + + Assert.assertTrue(cacheMisses.size() == 0); + } +} diff --git a/test/org/apache/tomcat/util/buf/TestCharsetCachePerformance.java b/test/org/apache/tomcat/util/buf/TestCharsetCachePerformance.java new file mode 100644 index 0000000..6823890 --- /dev/null +++ b/test/org/apache/tomcat/util/buf/TestCharsetCachePerformance.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tomcat.util.buf; + +import java.nio.charset.Charset; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; + +import org.junit.Test; + +public class TestCharsetCachePerformance { + + @Test + public void testNoCsCache() throws Exception { + doTest(new NoCsCache()); + } + + + @Test + public void testFullCsCache() throws Exception { + doTest(new FullCsCache()); + } + + + @Test + public void testLazyCsCache() throws Exception { + doTest(new LazyCsCache()); + } + + + private void doTest(CsCache cache) throws Exception { + int threadCount = 10; + int iterations = 10000000; + String[] lookupNames = new String[] { + "ISO-8859-1", "ISO-8859-2", "ISO-8859-3", "ISO-8859-4", "ISO-8859-5" }; + + Thread[] threads = new Thread[threadCount]; + + for (int i = 0; i < threadCount; i++) { + threads[i] = new TestCsCacheThread(iterations, cache, lookupNames); + } + + long startTime = System.nanoTime(); + + for (int i = 0; i < threadCount; i++) { + threads[i].start(); + } + + for (int i = 0; i < threadCount; i++) { + threads[i].join(); + } + + long endTime = System.nanoTime(); + + System.out.println(cache.getClass().getName() + ": " + (endTime - startTime) + "ns"); + } + + + private static interface CsCache { + Charset getCharset(String charsetName); + } + + + private static class NoCsCache implements CsCache { + + @Override + public Charset getCharset(String charsetName) { + return Charset.forName(charsetName); + } + } + + + private static class FullCsCache implements CsCache { + + private static final Map<String,Charset> cache = new HashMap<>(); + + static { + for (Charset charset: Charset.availableCharsets().values()) { + cache.put(charset.name().toLowerCase(Locale.ENGLISH), charset); + for (String alias : charset.aliases()) { + cache.put(alias.toLowerCase(Locale.ENGLISH), charset); + } + } + } + + + @Override + public Charset getCharset(String charsetName) { + return cache.get(charsetName.toLowerCase(Locale.ENGLISH)); + } + } + + + private static class LazyCsCache implements CsCache { + + private CharsetCache cache = new CharsetCache(); + + @Override + public Charset getCharset(String charsetName) { + return cache.getCharset(charsetName); + } + } + + + private static class TestCsCacheThread extends Thread { + + private final int iterations; + private final CsCache cache; + private final String[] lookupNames; + private final int lookupNamesCount; + + public TestCsCacheThread(int iterations, CsCache cache, String[] lookupNames) { + this.iterations = iterations; + this.cache = cache; + this.lookupNames = lookupNames; + this.lookupNamesCount = lookupNames.length; + } + + @Override + public void run() { + for (int i = 0; i < iterations; i++) { + cache.getCharset(lookupNames[i % lookupNamesCount]); + } + } + } +} diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml index 3256fba..c628646 100644 --- a/webapps/docs/changelog.xml +++ b/webapps/docs/changelog.xml @@ -74,6 +74,9 @@ <code>roleNested</code> set to <code>true</code>. (markt) </fix> <fix> + <bug>63235</bug>: Refactor Charset cache to reduce start time. (markt) + </fix> + <fix> <bug>63236</bug>: Use <code>String.intern()</code> as suggested by Phillip Webb to reduce memory wasted due to String duplication. This changes saves ~245k when starting a clean installation. With additional --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org