Interesting benchmark results! Your microbenchmark technique looks unusual, but seems to work.
I'm surprised there is that much difference. I would take out the swallowing of Exception. --- Your data contains only supplementary characters, which we are assuming are very rare. So I don't consider speeding up such a benchmark very important, but.... We can do it for free by switching isSupplementaryCodePoint => isValidCodePoint, so why not? --- While checking this, I noticed that Character.toChars can be sped up by using our new isBMPCodePoint method (always optimize for BMP!) --- Here's the change I'm making on top of isBMPCodePoint: http://cr.openjdk.java.net/~martin/webrevs/openjdk7/isBMPCodePoint2/ Ulf, please review. diff --git a/src/share/classes/java/lang/Character.java b/src/share/classes/java/lang/Character.java --- a/src/share/classes/java/lang/Character.java +++ b/src/share/classes/java/lang/Character.java @@ -3099,15 +3099,15 @@ * @since 1.5 */ public static int toChars(int codePoint, char[] dst, int dstIndex) { - if (codePoint < 0 || codePoint > MAX_CODE_POINT) { + if (isBMPCodePoint(codePoint)) { + dst[dstIndex] = (char) codePoint; + return 1; + } else if (isValidCodePoint(codePoint)) { + toSurrogates(codePoint, dst, dstIndex); + return 2; + } else { throw new IllegalArgumentException(); } - if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) { - dst[dstIndex] = (char) codePoint; - return 1; - } - toSurrogates(codePoint, dst, dstIndex); - return 2; } /** @@ -3127,15 +3127,15 @@ * @since 1.5 */ public static char[] toChars(int codePoint) { - if (codePoint < 0 || codePoint > MAX_CODE_POINT) { + if (isBMPCodePoint(codePoint)) { + return new char[] { (char) codePoint }; + } else if (isValidCodePoint(codePoint)) { + char[] result = new char[2]; + toSurrogates(codePoint, result, 0); + return result; + } else { throw new IllegalArgumentException(); } - if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) { - return new char[] { (char) codePoint }; - } - char[] result = new char[2]; - toSurrogates(codePoint, result, 0); - return result; } static void toSurrogates(int codePoint, char[] dst, int index) { diff --git a/src/share/classes/java/lang/String.java b/src/share/classes/java/lang/String.java --- a/src/share/classes/java/lang/String.java +++ b/src/share/classes/java/lang/String.java @@ -281,7 +281,7 @@ int c = codePoints[i]; if (Character.isBMPCodePoint(c)) n += 1; - else if (Character.isSupplementaryCodePoint(c)) + else if (Character.isValidCodePoint(c)) n += 2; else throw new IllegalArgumentException(Integer.toString(c)); } diff --git a/src/share/classes/sun/nio/cs/Surrogate.java b/src/share/classes/sun/nio/cs/Surrogate.java --- a/src/share/classes/sun/nio/cs/Surrogate.java +++ b/src/share/classes/sun/nio/cs/Surrogate.java @@ -294,7 +294,7 @@ dst.put((char)uc); error = null; return 1; - } else if (Character.isSupplementaryCodePoint(uc)) { + } else if (Character.isValidCodePoint(uc)) { if (dst.remaining() < 2) { error = CoderResult.OVERFLOW; return -1; @@ -338,7 +338,7 @@ da[dp] = (char)uc; error = null; return 1; - } else if (Character.isSupplementaryCodePoint(uc)) { + } else if (Character.isValidCodePoint(uc)) { if (dl - dp < 2) { error = CoderResult.OVERFLOW; return -1; Martin On Fri, Mar 19, 2010 at 14:46, Ulf Zibis <[email protected]> wrote: > Am 16.03.2010 23:35, schrieb Xueming Shen: >> >> Martin Buchholz wrote: >>> >>> The primary theory here is that branches are expensive, >>> and we are reducing them by one. >>> >> >> There are still two branches in new impl, if you count the "ifeq" and >> "if_icmpge"(?) >> >> We are trying to "optimize" this piece of code with the assumption that >> the new impl MIGHT help certain vm (hotspot?) >> to optimize certain use scenario (some consecutive usages), if the >> compiler and/or the vm are both smart enough at certain >> point, with no supporting benchmark data? > > I've finished the benchmark: > https://java-nio-charset-enhanced.dev.java.net/source/browse/java-nio-charset-enhanced/branches/JDK-7/j_l_Character_charCount/src/java/lang/CharacterBenchmark.java?rev=1006&view=log > > The results: > time1: 2316,213 ms ..à la Martin > time2: 1267,063 ms > time3: 1245,972 ms ..using isValidCodePoint > time4: 1467,570 ms ..validate version (slower, because of unreasonable > HotSpot optimizing, see "C2 optimization bug ?" in hotspot-compiler-dev > list) > > Here see the disassembly snippets: > https://java-nio-charset-enhanced.dev.java.net/source/browse/java-nio-charset-enhanced/branches/JDK-7/j_l_Character_charCount/log/PA_Character_compare.txt?rev=1007&view=markup >
