This is an automated email from the ASF dual-hosted git repository. zabetak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push: new 6687023 [CALCITE-2887] Improve performance of RexLiteral#toJavaString 6687023 is described below commit 6687023e9517fba00c5eeb85cc7b94efa0a6b9c0 Author: Stamatis Zampetakis <zabe...@gmail.com> AuthorDate: Fri Mar 1 18:24:41 2019 +0100 [CALCITE-2887] Improve performance of RexLiteral#toJavaString 1. Replace combination of PrintWriter/StringWriter with StringBuilder. 2. Add JMH benchmark showing a 10X improvement when choosing StringBuilder. 3. Remove unused method from Unsafe. 4. Improve Javadoc of modified methods. --- .../java/org/apache/calcite/rex/RexLiteral.java | 253 +++++++++++---------- .../main/java/org/apache/calcite/util/Unsafe.java | 6 - .../main/java/org/apache/calcite/util/Util.java | 35 +-- .../benchmarks/StringConstructBenchmark.java | 172 ++++++++++++++ 4 files changed, 323 insertions(+), 143 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/rex/RexLiteral.java b/core/src/main/java/org/apache/calcite/rex/RexLiteral.java index aa78730..192a962 100644 --- a/core/src/main/java/org/apache/calcite/rex/RexLiteral.java +++ b/core/src/main/java/org/apache/calcite/rex/RexLiteral.java @@ -35,14 +35,13 @@ import org.apache.calcite.util.Litmus; import org.apache.calcite.util.NlsString; import org.apache.calcite.util.TimeString; import org.apache.calcite.util.TimestampString; -import org.apache.calcite.util.Unsafe; import org.apache.calcite.util.Util; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; +import java.io.IOException; import java.io.PrintWriter; -import java.io.StringWriter; import java.math.BigDecimal; import java.nio.ByteBuffer; import java.nio.charset.Charset; @@ -382,21 +381,19 @@ public class RexLiteral extends RexNode { if (value == null) { return includeType == RexDigestIncludeType.NO_TYPE ? "null" : "null:" + fullTypeString; } - StringWriter sw = new StringWriter(); - PrintWriter pw = new PrintWriter(sw); - printAsJava(value, pw, typeName, false, includeType); - pw.flush(); + StringBuilder sb = new StringBuilder(); + appendAsJava(value, sb, typeName, false, includeType); if (includeType != RexDigestIncludeType.NO_TYPE) { - sw.append(':'); + sb.append(':'); if (!fullTypeString.endsWith("NOT NULL")) { - sw.append(fullTypeString); + sb.append(fullTypeString); } else { // Trim " NOT NULL". Apparently, the literal is not null, so we just print the data type. - Unsafe.append(sw, fullTypeString, 0, fullTypeString.length() - 9); + sb.append(fullTypeString, 0, fullTypeString.length() - 9); } } - return sw.toString(); + return sb.toString(); } /** @@ -551,14 +548,14 @@ public class RexLiteral extends RexNode { * Prints the value this literal as a Java string constant. */ public void printAsJava(PrintWriter pw) { - printAsJava(value, pw, typeName, true, RexDigestIncludeType.NO_TYPE); + appendAsJava(value, pw, typeName, true, RexDigestIncludeType.NO_TYPE); } /** - * Prints a value as a Java string. The value must be consistent with the - * type, as per {@link #valueMatchesType}. + * Appends the specified value in the provided destination as a Java string. The value must be + * consistent with the type, as per {@link #valueMatchesType}. * - * <p>Typical return values: + * <p>Typical return values:</p> * * <ul> * <li>true</li> @@ -567,122 +564,130 @@ public class RexLiteral extends RexNode { * <li>1.25</li> * <li>1234ABCD</li> * </ul> - * @param value Value - * @param pw Writer to write to - * @param typeName Type family - * @param includeType if representation should include data type + * + * <p>The destination where the value is appended must not incur I/O operations. This method is + * not meant to be used for writing the values to permanent storage.</p> + * + * @param value a value to be appended to the provided destination as a Java string + * @param destination a destination where to append the specified value + * @param typeName a type name to be used for the transformation of the value to a Java string + * @param includeType an indicator whether to include the data type in the Java representation + * @throws IllegalStateException if the appending to the destination <code>Appendable</code> fails + * due to I/O */ - private static void printAsJava( + private static void appendAsJava( Comparable value, - PrintWriter pw, + Appendable destination, SqlTypeName typeName, boolean java, RexDigestIncludeType includeType) { - switch (typeName) { - case CHAR: - NlsString nlsString = (NlsString) value; - if (java) { - Util.printJavaString( - pw, - nlsString.getValue(), - true); - } else { - boolean includeCharset = - (nlsString.getCharsetName() != null) - && !nlsString.getCharsetName().equals( - CalciteSystemProperty.DEFAULT_CHARSET.value()); - pw.print(nlsString.asSql(includeCharset, false)); - } - break; - case BOOLEAN: - assert value instanceof Boolean; - pw.print(((Boolean) value).booleanValue()); - break; - case DECIMAL: - assert value instanceof BigDecimal; - pw.print(value.toString()); - break; - case DOUBLE: - assert value instanceof BigDecimal; - pw.print(Util.toScientificNotation((BigDecimal) value)); - break; - case BIGINT: - assert value instanceof BigDecimal; - pw.print(((BigDecimal) value).longValue()); - pw.print('L'); - break; - case BINARY: - assert value instanceof ByteString; - pw.print("X'"); - pw.print(((ByteString) value).toString(16)); - pw.print("'"); - break; - case NULL: - assert value == null; - pw.print("null"); - break; - case SYMBOL: - assert value instanceof Enum; - pw.print("FLAG("); - pw.print(value); - pw.print(")"); - break; - case DATE: - assert value instanceof DateString; - pw.print(value); - break; - case TIME: - assert value instanceof TimeString; - pw.print(value); - break; - case TIME_WITH_LOCAL_TIME_ZONE: - assert value instanceof TimeString; - pw.print(value); - break; - case TIMESTAMP: - assert value instanceof TimestampString; - pw.print(value); - break; - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - assert value instanceof TimestampString; - pw.print(value); - break; - case INTERVAL_YEAR: - case INTERVAL_YEAR_MONTH: - case INTERVAL_MONTH: - case INTERVAL_DAY: - case INTERVAL_DAY_HOUR: - case INTERVAL_DAY_MINUTE: - case INTERVAL_DAY_SECOND: - case INTERVAL_HOUR: - case INTERVAL_HOUR_MINUTE: - case INTERVAL_HOUR_SECOND: - case INTERVAL_MINUTE: - case INTERVAL_MINUTE_SECOND: - case INTERVAL_SECOND: - if (value instanceof BigDecimal) { - pw.print(value.toString()); - } else { + try { + switch (typeName) { + case CHAR: + NlsString nlsString = (NlsString) value; + if (java) { + Util.printJavaString( + destination, + nlsString.getValue(), + true); + } else { + boolean includeCharset = + (nlsString.getCharsetName() != null) + && !nlsString.getCharsetName().equals( + CalciteSystemProperty.DEFAULT_CHARSET.value()); + destination.append(nlsString.asSql(includeCharset, false)); + } + break; + case BOOLEAN: + assert value instanceof Boolean; + destination.append(value.toString()); + break; + case DECIMAL: + assert value instanceof BigDecimal; + destination.append(value.toString()); + break; + case DOUBLE: + assert value instanceof BigDecimal; + destination.append(Util.toScientificNotation((BigDecimal) value)); + break; + case BIGINT: + assert value instanceof BigDecimal; + long narrowLong = ((BigDecimal) value).longValue(); + destination.append(String.valueOf(narrowLong)); + destination.append('L'); + break; + case BINARY: + assert value instanceof ByteString; + destination.append("X'"); + destination.append(((ByteString) value).toString(16)); + destination.append("'"); + break; + case NULL: assert value == null; - pw.print("null"); - } - break; - case MULTISET: - case ROW: - @SuppressWarnings("unchecked") final List<RexLiteral> list = (List) value; - pw.print( - new AbstractList<String>() { - public String get(int index) { - return list.get(index).computeDigest(includeType); - } + destination.append("null"); + break; + case SYMBOL: + assert value instanceof Enum; + destination.append("FLAG("); + destination.append(value.toString()); + destination.append(")"); + break; + case DATE: + assert value instanceof DateString; + destination.append(value.toString()); + break; + case TIME: + assert value instanceof TimeString; + destination.append(value.toString()); + break; + case TIME_WITH_LOCAL_TIME_ZONE: + assert value instanceof TimeString; + destination.append(value.toString()); + break; + case TIMESTAMP: + assert value instanceof TimestampString; + destination.append(value.toString()); + break; + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + assert value instanceof TimestampString; + destination.append(value.toString()); + break; + case INTERVAL_YEAR: + case INTERVAL_YEAR_MONTH: + case INTERVAL_MONTH: + case INTERVAL_DAY: + case INTERVAL_DAY_HOUR: + case INTERVAL_DAY_MINUTE: + case INTERVAL_DAY_SECOND: + case INTERVAL_HOUR: + case INTERVAL_HOUR_MINUTE: + case INTERVAL_HOUR_SECOND: + case INTERVAL_MINUTE: + case INTERVAL_MINUTE_SECOND: + case INTERVAL_SECOND: + assert value instanceof BigDecimal; + destination.append(value.toString()); + break; + case MULTISET: + case ROW: + @SuppressWarnings("unchecked") + final List<RexLiteral> list = (List) value; + destination.append( + (new AbstractList<String>() { + public String get(int index) { + return list.get(index).computeDigest(includeType); + } - public int size() { - return list.size(); - } - }); - break; - default: - assert valueMatchesType(value, typeName, true); - throw Util.needToImplement(typeName); + public int size() { + return list.size(); + } + }).toString()); + break; + default: + assert valueMatchesType(value, typeName, true); + throw Util.needToImplement(typeName); + } + } catch (IOException e) { + throw new IllegalStateException("The destination Appendable should not incur I/O.", e); } } diff --git a/core/src/main/java/org/apache/calcite/util/Unsafe.java b/core/src/main/java/org/apache/calcite/util/Unsafe.java index 39cce2e..61805ab 100644 --- a/core/src/main/java/org/apache/calcite/util/Unsafe.java +++ b/core/src/main/java/org/apache/calcite/util/Unsafe.java @@ -49,12 +49,6 @@ public class Unsafe { // Included in this class because StringBuffer is banned. sw.getBuffer().setLength(0); } - - /** Appends to {@link StringWriter}. */ - public static void append(StringWriter sw, CharSequence charSequence, int start, int end) { - // Included in this class because StringBuffer is banned. - sw.getBuffer().append(charSequence, start, end); - } } // End Unsafe.java diff --git a/core/src/main/java/org/apache/calcite/util/Util.java b/core/src/main/java/org/apache/calcite/util/Util.java index 2544481..b4b3148 100644 --- a/core/src/main/java/org/apache/calcite/util/Util.java +++ b/core/src/main/java/org/apache/calcite/util/Util.java @@ -479,24 +479,33 @@ public class Util { * Prints a string, enclosing in double quotes (") and escaping if * necessary. For examples, <code>printDoubleQuoted(w,"x\"y",false)</code> * prints <code>"x\"y"</code>. + * + * <p>The appendable where the value is printed must not incur I/O operations. This method is + * not meant to be used for writing the values to permanent storage.</p> + * + * @throws IllegalStateException if the print to the specified appendable fails due to I/O */ public static void printJavaString( - PrintWriter pw, + Appendable appendable, String s, boolean nullMeansNull) { - if (s == null) { - if (nullMeansNull) { - pw.print("null"); + try { + if (s == null) { + if (nullMeansNull) { + appendable.append("null"); + } + } else { + String s1 = replace(s, "\\", "\\\\"); + String s2 = replace(s1, "\"", "\\\""); + String s3 = replace(s2, "\n\r", "\\n"); + String s4 = replace(s3, "\n", "\\n"); + String s5 = replace(s4, "\r", "\\r"); + appendable.append('"'); + appendable.append(s5); + appendable.append('"'); } - } else { - String s1 = replace(s, "\\", "\\\\"); - String s2 = replace(s1, "\"", "\\\""); - String s3 = replace(s2, "\n\r", "\\n"); - String s4 = replace(s3, "\n", "\\n"); - String s5 = replace(s4, "\r", "\\r"); - pw.print("\""); - pw.print(s5); - pw.print("\""); + } catch (IOException ioe) { + throw new IllegalStateException("The specified appendable should not incur I/O.", ioe); } } diff --git a/ubenchmark/src/main/java/org/apache/calcite/benchmarks/StringConstructBenchmark.java b/ubenchmark/src/main/java/org/apache/calcite/benchmarks/StringConstructBenchmark.java new file mode 100644 index 0000000..c340dbb --- /dev/null +++ b/ubenchmark/src/main/java/org/apache/calcite/benchmarks/StringConstructBenchmark.java @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.benchmarks; + + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.io.Writer; +import java.util.concurrent.TimeUnit; + +/** + * A benchmark of the most common patterns that are used to construct gradually String objects. + * + * The benchmark emphasizes on the build patterns that appear in the Calcite project. + */ +@Fork(value = 1, jvmArgsPrepend = "-Xmx2048m") +@Measurement(iterations = 10, time = 100, timeUnit = TimeUnit.MILLISECONDS) +@Warmup(iterations = 10, time = 100, timeUnit = TimeUnit.MILLISECONDS) +@Threads(1) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@BenchmarkMode(Mode.Throughput) +public class StringConstructBenchmark { + + /** + * A state holding a Writer object which is initialized only once at the beginning of the + * benchmark. + */ + @State(Scope.Thread) + public static class WriterState { + public Writer writer; + + @Setup(Level.Trial) + public void setup() { + this.writer = new StringWriter(); + } + } + + /** + * A state holding an Appendable object which is initialized after a fixed number of append + * operations. + */ + @State(Scope.Thread) + public static class AppenderState { + /** + * The type of the appender to be initialised. + */ + @Param({"StringBuilder", "StringWriter", "PrintWriter"}) + public String appenderType; + + /** + * The maximum number of appends before resetting the appender. + * + * If the value is small then the appender is reinitialized very often, making the instantiation + * of the appender the dominant operation of the benchmark. + */ + @Param({"1", "256", "512", "1024"}) + public int maxAppends; + + /** + * The appender that is currently used. + */ + private Appendable appender; + + /** + * The number of append operations performed so far. + */ + private int nAppends = 0; + + @Setup(Level.Iteration) + public void setup() { + reset(); + } + + private void reset() { + nAppends = 0; + if (appenderType.equals("StringBuilder")) { + this.appender = new StringBuilder(); + } else if (appenderType.equals("StringWriter")) { + this.appender = new StringWriter(); + } else if (appenderType.equals("PrintWriter")) { + this.appender = new PrintWriter(new StringWriter()); + } else { + throw new IllegalStateException( + "The specified appender type (" + appenderType + ") is not supported."); + } + } + + Appendable getOrCreateAppender() { + if (nAppends >= maxAppends) { + reset(); + } + nAppends++; + return appender; + } + + } + + @Benchmark + public StringBuilder initStringBuilder() { + return new StringBuilder(); + } + + @Benchmark + public StringWriter initStringWriter() { + return new StringWriter(); + } + + @Benchmark + public PrintWriter initPrintWriter(WriterState writerState) { + return new PrintWriter(writerState.writer); + } + + /** + * Benchmarks the performance of instantiating different {@link Appendable} objects and appending + * the same string a fixed number of times. + * + * @param bh blackhole used as an optimization fence + * @param appenderState the state holds the type of the appender and the number of appends that + * need to be performed before resetting the appender + * @throws IOException if the append operation encounters an I/O problem + */ + @Benchmark + public void appendString(Blackhole bh, AppenderState appenderState) throws IOException { + bh.consume(appenderState.getOrCreateAppender().append("placeholder")); + } + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder() + .include(StringConstructBenchmark.class.getName()) + .detectJvmArgs() + .build(); + + new Runner(opt).run(); + } +} + +// End StringConstructBenchmark.java