I am worried and almost -1 here: It is OK to copy from OpenJDK but I bet it's not OK to copy from Oracle's JRE... but how can we properly benchmark then?
Gary ---------- Forwarded message ---------- From: <[email protected]> Date: Mon, Oct 5, 2015 at 4:14 PM Subject: [1/2] logging-log4j2 git commit: LOG4J2-1151 updated: added exact port of JDK 8 logic for handling invalid input, renamed methods To: [email protected] Repository: logging-log4j2 Updated Branches: refs/heads/master 2612b61e6 -> 8b9d10095 LOG4J2-1151 updated: added exact port of JDK 8 logic for handling invalid input, renamed methods Project: http://git-wip-us.apache.org/repos/asf/logging-log4j2/repo Commit: http://git-wip-us.apache.org/repos/asf/logging-log4j2/commit/1360daa6 Tree: http://git-wip-us.apache.org/repos/asf/logging-log4j2/tree/1360daa6 Diff: http://git-wip-us.apache.org/repos/asf/logging-log4j2/diff/1360daa6 Branch: refs/heads/master Commit: 1360daa694ed247b100d8d19336c92ecebd147ff Parents: 9bc8144 Author: rpopma <[email protected]> Authored: Tue Oct 6 01:14:18 2015 +0200 Committer: rpopma <[email protected]> Committed: Tue Oct 6 01:14:18 2015 +0200 ---------------------------------------------------------------------- .../log4j/perf/jmh/StringEncodingBenchmark.java | 151 +++++++++++++++++-- 1 file changed, 137 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/logging-log4j2/blob/1360daa6/log4j-perf/src/main/java/org/apache/logging/log4j/perf/jmh/StringEncodingBenchmark.java ---------------------------------------------------------------------- diff --git a/log4j-perf/src/main/java/org/apache/logging/log4j/perf/jmh/StringEncodingBenchmark.java b/log4j-perf/src/main/java/org/apache/logging/log4j/perf/jmh/StringEncodingBenchmark.java index 4159b2a..7e9459c 100644 --- a/log4j-perf/src/main/java/org/apache/logging/log4j/perf/jmh/StringEncodingBenchmark.java +++ b/log4j-perf/src/main/java/org/apache/logging/log4j/perf/jmh/StringEncodingBenchmark.java @@ -35,7 +35,7 @@ import org.openjdk.jmh.annotations.State; * Tests Log4j2 StringEncoding performance. */ // ============================== HOW TO RUN THIS TEST: ==================================== -//(Quick build: mvn -DskipTests=true clean package -pl log4j-perf -am ) +// (Quick build: mvn -DskipTests=true clean package -pl log4j-perf -am ) // // java -jar log4j-perf/target/benchmarks.jar ".*StringEncoding.*" -f 1 -wi 5 -i 10 // @@ -63,70 +63,70 @@ public class StringEncodingBenchmark { @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) - public byte[] stringGetBytes() { + public byte[] defaultStringGetBytes() { return LOGMSG.getBytes(); } @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) - public byte[] stringGetBytesString88591() throws Exception { + public byte[] iso8859_1StringGetBytesString() throws Exception { return LOGMSG.getBytes(STRING_ISO8859_1); } @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) - public byte[] stringGetBytesCharSet88591() { + public byte[] iso8859_1StringGetBytesCharSet() { return LOGMSG.getBytes(CHARSET_ISO8859_1); } @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) - public byte[] stringGetBytesStringUsAscii() throws Exception { + public byte[] usAsciiStringGetBytesString() throws Exception { return LOGMSG.getBytes(STRING_US_ASCII); } @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) - public byte[] stringGetBytesCharSetUsAscii() { + public byte[] usAsciiStringGetBytesCharSet() { return LOGMSG.getBytes(CHARSET_US_ASCII); } @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) - public byte[] stringGetBytesStringDefault() throws Exception { + public byte[] defaultStringGetBytesString() throws Exception { return LOGMSG.getBytes(DEFAULT_ENCODING); } @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) - public byte[] stringGetBytesCharSetDefault() { + public byte[] defaultStringGetBytesCharSet() { return LOGMSG.getBytes(CHARSET_DEFAULT); } @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) - public byte[] stringGetBytesStringShiftJIS() throws Exception { + public byte[] shiftJisStringGetBytesString() throws Exception { return LOGMSG.getBytes(STRING_SHIFT_JIS); } @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) - public byte[] stringGetBytesCharSetShiftJIS() { + public byte[] shiftJisStringGetBytesCharSet() { return LOGMSG.getBytes(CHARSET_SHIFT_JIS); } @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) - public byte[] encoderShiftJIS() throws CharacterCodingException { + public byte[] shiftJisEncoder() throws CharacterCodingException { ByteBuffer buf = ENCODER_SHIFT_JIS.encode(CharBuffer.wrap(LOGMSG)); return buf.array(); } @@ -134,7 +134,7 @@ public class StringEncodingBenchmark { @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) - public byte[] encoderIso8859_1() throws CharacterCodingException { + public byte[] iso8859_1Encoder() throws CharacterCodingException { ByteBuffer buf = ENCODER_ISO8859_1.encode(CharBuffer.wrap(LOGMSG)); return buf.array(); } @@ -142,12 +142,135 @@ public class StringEncodingBenchmark { @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) - public byte[] customIso8859_1() throws CharacterCodingException { + public byte[] iso8859_1CustomCastToByte() throws CharacterCodingException { final int length = LOGMSG.length(); final byte[] result = new byte[length]; for (int i = 0; i < length; i++) { - result[i] = (byte) LOGMSG.charAt(i); + final char c = LOGMSG.charAt(i); + result[i++] = (byte) c; } return result; } + + @Benchmark + @BenchmarkMode(Mode.SampleTime) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + public byte[] iso8859_1CustomVerifyAndCast() throws CharacterCodingException { + final int length = LOGMSG.length(); + final byte[] result = new byte[length]; + int j = 0; + for (int i = 0; i < length; i++) { + final char c = LOGMSG.charAt(i); + if (c <= 255) { + result[j++] = (byte) c; + } else { + i = nonIsoChar(LOGMSG, i); + result[j++] = (byte) '?'; + } + } + return result; + } + + private int nonIsoChar(String logmsg, int i) { + char c = logmsg.charAt(i++); + if ((Character.isHighSurrogate(c)) && (i < logmsg.length()) && (Character.isLowSurrogate(logmsg.charAt(i)))) { + i++; + } + return i; + } + + @Benchmark + @BenchmarkMode(Mode.SampleTime) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + public byte[] iso8859_1CustomPortedJDK8() throws CharacterCodingException { + final int length = LOGMSG.length(); + final byte[] result = new byte[length]; + encode(LOGMSG, 0, length, result); + return result; + } + + private static int encodeISOArray(String charArray, int charIndex, byte[] byteArray, int byteIndex, int length) { + int i = 0; + for (; i < length; i++) { + char c = charArray.charAt(charIndex++); + if (c > 255) { + break; + } + byteArray[(byteIndex++)] = ((byte) c); + } + return i; + } + + private int encode(String charArray, int charOffset, int charLength, byte[] byteArray) { + int offset = 0; + int length = Math.min(charLength, byteArray.length); + int charDoneIndex = charOffset + length; + while (charOffset < charDoneIndex) { + int m = encodeISOArray(charArray, charOffset, byteArray, offset, length); + charOffset += m; + offset += m; + if (m != length) { + char c = charArray.charAt(charOffset++); + if ((Character.isHighSurrogate(c)) && (charOffset < charDoneIndex) + && (Character.isLowSurrogate(charArray.charAt(charOffset)))) { + if (charLength > byteArray.length) { + charDoneIndex++; + charLength--; + } + charOffset++; + } + byteArray[(offset++)] = '?'; + length = Math.min(charDoneIndex - charOffset, byteArray.length - offset); + } + } + return offset; + } + + @Benchmark + @BenchmarkMode(Mode.SampleTime) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + public byte[] iso8859_1CustomPortedJDK8CopyArray() throws CharacterCodingException { + char[] charArray = LOGMSG.toCharArray(); + final int length = charArray.length; + final byte[] result = new byte[length]; + encode0(charArray, 0, length, result); + return result; + } + + private static int encodeISOArray0(char[] charArray, int charIndex, byte[] byteArray, int byteIndex, int length) { + int i = 0; + for (; i < length; i++) { + char c = charArray[(charIndex++)]; + if (c > 255) { + break; + } + byteArray[(byteIndex++)] = ((byte) c); + } + return i; + } + + private int encode0(char[] charArray, int charOffset, int charLength, byte[] byteArray) { + int offset = 0; + int length = Math.min(charLength, byteArray.length); + int charDoneIndex = charOffset + length; + while (charOffset < charDoneIndex) { + int m = encodeISOArray0(charArray, charOffset, byteArray, offset, length); + charOffset += m; + offset += m; + if (m != length) { + char c = charArray[(charOffset++)]; + if ((Character.isHighSurrogate(c)) && (charOffset < charDoneIndex) + && (Character.isLowSurrogate(charArray[(charOffset)]))) { + if (charLength > byteArray.length) { + charDoneIndex++; + charLength--; + } + charOffset++; + } + byteArray[(offset++)] = '?'; + length = Math.min(charDoneIndex - charOffset, byteArray.length - offset); + } + } + return offset; + } } -- E-Mail: [email protected] | [email protected] Java Persistence with Hibernate, Second Edition <http://www.manning.com/bauer3/> JUnit in Action, Second Edition <http://www.manning.com/tahchiev/> Spring Batch in Action <http://www.manning.com/templier/> Blog: http://garygregory.wordpress.com Home: http://garygregory.com/ Tweet! http://twitter.com/GaryGregory
