This is an automated email from the ASF dual-hosted git repository.
nehapawar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push:
new ca900e4 Optimize GroovyExpressionEvaluator (#5257)
ca900e4 is described below
commit ca900e4f52f65738d5cb03ab10484f6c97bcd68f
Author: Neha Pawar <[email protected]>
AuthorDate: Thu Apr 16 12:56:46 2020 -0700
Optimize GroovyExpressionEvaluator (#5257)
---
.../perf/BenchmarkGroovyExpressionEvaluation.java | 190 +++++++++++++++++++++
.../evaluators/GroovyExpressionEvaluator.java | 39 ++---
.../spi/utils/SchemaFieldExtractorUtilsTest.java | 17 +-
3 files changed, 220 insertions(+), 26 deletions(-)
diff --git
a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkGroovyExpressionEvaluation.java
b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkGroovyExpressionEvaluation.java
new file mode 100644
index 0000000..170e537
--- /dev/null
+++
b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkGroovyExpressionEvaluation.java
@@ -0,0 +1,190 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.perf;
+
+import groovy.lang.Binding;
+import groovy.lang.GroovyClassLoader;
+import groovy.lang.GroovyCodeSource;
+import groovy.lang.GroovyShell;
+import groovy.lang.Script;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import org.apache.commons.lang3.RandomStringUtils;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.ChainedOptionsBuilder;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+import org.openjdk.jmh.runner.options.TimeValue;
+
+
+@State(Scope.Benchmark)
+@Fork(value = 1, jvmArgs = {"-server", "-Xmx8G",
"-XX:MaxDirectMemorySize=16G"})
+public class BenchmarkGroovyExpressionEvaluation {
+
+ private final GroovyClassLoader _groovyClassLoader = new GroovyClassLoader();
+ private final Random _random = new Random();
+
+ private String _concatScriptText;
+ private GroovyCodeSource _concatCodeSource;
+ private String _maxScriptText;
+ private GroovyCodeSource _maxCodeSource;
+
+ private Binding _concatBinding;
+ private Script _concatScript;
+ private Script _concatGCLScript;
+ private Binding _maxBinding;
+ private Script _maxScript;
+ private Script _maxGCLScript;
+
+ @Setup
+ public void setup()
+ throws IllegalAccessException, InstantiationException {
+ _concatScriptText = "firstName + ' ' + lastName";
+ _concatBinding = new Binding();
+ _concatScript = new GroovyShell(_concatBinding).parse(_concatScriptText);
+ _concatCodeSource = new GroovyCodeSource(_concatScriptText,
Math.abs(_concatScriptText.hashCode()) + ".groovy",
+ GroovyShell.DEFAULT_CODE_BASE);
+ _concatGCLScript = (Script)
_groovyClassLoader.parseClass(_concatCodeSource).newInstance();
+
+ _maxScriptText = "longList.max{ it.toBigDecimal() }";
+ _maxBinding = new Binding();
+ _maxScript = new GroovyShell(_maxBinding).parse(_maxScriptText);
+ _maxCodeSource = new GroovyCodeSource(_maxScriptText,
Math.abs(_maxScriptText.hashCode()) + ".groovy",
+ GroovyShell.DEFAULT_CODE_BASE);
+ _maxGCLScript = (Script)
_groovyClassLoader.parseClass(_maxCodeSource).newInstance();
+ }
+
+ private String getFirstName() {
+ return RandomStringUtils.randomAlphabetic(10);
+ }
+
+ private String getLastName() {
+ return RandomStringUtils.randomAlphabetic(20);
+ }
+
+ private List<String> getLongList() {
+ int listLength = _random.nextInt(100) + 10;
+ List<String> longList = new ArrayList<>(listLength);
+ for (int i = 0; i < listLength; i++) {
+ longList.add(String.valueOf(_random.nextInt()));
+ }
+ return longList;
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void javaConcat() {
+ getFullNameJava(getFirstName(), getLastName());
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void groovyShellConcat() {
+ getFullNameGroovyShell(getFirstName(), getLastName());
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void groovyCodeSourceConcat() {
+ getFullNameGroovyCodeSource(getFirstName(), getLastName());
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void javaMax() {
+ List<String> longList = getLongList();
+ getMaxJava(longList);
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void groovyShellMax() {
+ List<String> longList = getLongList();
+ getMaxGroovyShell(longList);
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void groovyCodeSourceMax() {
+ List<String> longList = getLongList();
+ getMaxGroovyCodeSource(longList);
+ }
+
+ private String getFullNameJava(String firstName, String lastName) {
+ return String.join(" ", firstName, lastName);
+ }
+
+ private Object getFullNameGroovyShell(String firstName, String lastName) {
+ _concatBinding.setVariable("firstName", firstName);
+ _concatBinding.setVariable("lastName", lastName);
+ return _concatScript.run();
+ }
+
+ private Object getFullNameGroovyCodeSource(String firstName, String
lastName) {
+ _concatBinding.setVariable("firstName", firstName);
+ _concatBinding.setVariable("lastName", lastName);
+ _concatGCLScript.setBinding(_concatBinding);
+ return _concatGCLScript.run();
+ }
+
+ private int getMaxJava(List<String> longList) {
+ int maxInt = Integer.MIN_VALUE;
+ for (String value : longList) {
+ int number = Integer.parseInt(value);
+ if (number > maxInt) {
+ maxInt = number;
+ }
+ }
+ return maxInt;
+ }
+
+ private Object getMaxGroovyShell(List<String> longList) {
+ _maxBinding.setVariable("longList", longList);
+ return _maxScript.run();
+ }
+
+ private Object getMaxGroovyCodeSource(List<String> longList) {
+ _maxBinding.setVariable("longList", longList);
+ _maxGCLScript.setBinding(_maxBinding);
+ return _maxGCLScript.run();
+ }
+
+ public static void main(String[] args)
+ throws Exception {
+ ChainedOptionsBuilder opt = new
OptionsBuilder().include(BenchmarkGroovyExpressionEvaluation.class.getSimpleName())
+
.warmupTime(TimeValue.seconds(10)).warmupIterations(1).measurementTime(TimeValue.seconds(30))
+ .measurementIterations(3).forks(1);
+ new Runner(opt.build()).run();
+ }
+}
diff --git
a/pinot-spi/src/main/java/org/apache/pinot/spi/data/function/evaluators/GroovyExpressionEvaluator.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/function/evaluators/GroovyExpressionEvaluator.java
index ba718e6..68ecfa6 100644
---
a/pinot-spi/src/main/java/org/apache/pinot/spi/data/function/evaluators/GroovyExpressionEvaluator.java
+++
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/function/evaluators/GroovyExpressionEvaluator.java
@@ -18,13 +18,13 @@
*/
package org.apache.pinot.spi.data.function.evaluators;
+import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import groovy.lang.Binding;
import groovy.lang.GroovyShell;
+import groovy.lang.Script;
import java.util.Collections;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.pinot.spi.data.readers.GenericRow;
@@ -54,21 +54,21 @@ public class GroovyExpressionEvaluator implements
ExpressionEvaluator {
private static final String SCRIPT_GROUP_NAME = "script";
private static final String ARGUMENTS_SEPARATOR = ",";
- private List<String> _arguments;
- private String _script;
+ private final List<String> _arguments;
+ private final Binding _binding;
+ private final Script _script;
public GroovyExpressionEvaluator(String transformExpression) {
Matcher matcher = GROOVY_FUNCTION_PATTERN.matcher(transformExpression);
- if (matcher.matches()) {
- _script = matcher.group(SCRIPT_GROUP_NAME);
-
- String arguments = matcher.group(ARGUMENTS_GROUP_NAME);
- if (arguments != null) {
- _arguments =
Splitter.on(ARGUMENTS_SEPARATOR).trimResults().splitToList(arguments);
- } else {
- _arguments = Collections.emptyList();
- }
+ Preconditions.checkState(matcher.matches(), "Invalid transform expression:
%s", transformExpression);
+ String arguments = matcher.group(ARGUMENTS_GROUP_NAME);
+ if (arguments != null) {
+ _arguments =
Splitter.on(ARGUMENTS_SEPARATOR).trimResults().splitToList(arguments);
+ } else {
+ _arguments = Collections.emptyList();
}
+ _binding = new Binding();
+ _script = new
GroovyShell(_binding).parse(matcher.group(SCRIPT_GROUP_NAME));
}
public static String getGroovyExpressionPrefix() {
@@ -82,21 +82,14 @@ public class GroovyExpressionEvaluator implements
ExpressionEvaluator {
@Override
public Object evaluate(GenericRow genericRow) {
- Map<String, Object> params = new HashMap<>();
for (String argument : _arguments) {
Object value = genericRow.getValue(argument);
if (value == null) {
- // TODO: disallow evaluation of any of the params is null? Or give
complete control to function?
+ // FIXME: if any param is null a) exit OR b) assume function handles
it ?
return null;
}
- params.put(argument, value);
- }
-
- Binding binding = new Binding();
- for (String argument : _arguments) {
- binding.setVariable(argument, params.get(argument));
+ _binding.setVariable(argument, value);
}
- GroovyShell shell = new GroovyShell(binding);
- return shell.evaluate(_script);
+ return _script.run();
}
}
diff --git
a/pinot-spi/src/test/java/org/apache/pinot/spi/utils/SchemaFieldExtractorUtilsTest.java
b/pinot-spi/src/test/java/org/apache/pinot/spi/utils/SchemaFieldExtractorUtilsTest.java
index 5035f4b..c8cd285 100644
---
a/pinot-spi/src/test/java/org/apache/pinot/spi/utils/SchemaFieldExtractorUtilsTest.java
+++
b/pinot-spi/src/test/java/org/apache/pinot/spi/utils/SchemaFieldExtractorUtilsTest.java
@@ -141,20 +141,31 @@ public class SchemaFieldExtractorUtilsTest {
pinotSchema.addField(timeFieldSpec);
Assert.assertFalse(SchemaFieldExtractorUtils.validate(pinotSchema));
+ // incorrect groovy function syntax
+ pinotSchema = new Schema();
+ dimensionFieldSpec = new DimensionFieldSpec("dim1",
FieldSpec.DataType.STRING, true);
+ dimensionFieldSpec.setTransformFunction("Groovy(function, argument3)");
+ pinotSchema.addField(dimensionFieldSpec);
+ Assert.assertFalse(SchemaFieldExtractorUtils.validate(pinotSchema));
+
+ // valid schema, empty arguments
+ pinotSchema = new Schema();
+ dimensionFieldSpec = new DimensionFieldSpec("dim1",
FieldSpec.DataType.STRING, true);
+ dimensionFieldSpec.setTransformFunction("Groovy({function})");
+ pinotSchema.addField(dimensionFieldSpec);
+ Assert.assertTrue(SchemaFieldExtractorUtils.validate(pinotSchema));
+
// valid schema
pinotSchema = new Schema();
dimensionFieldSpec = new DimensionFieldSpec("dim1",
FieldSpec.DataType.STRING, true);
dimensionFieldSpec.setTransformFunction("Groovy({function}, argument1,
argument2, argument3)");
pinotSchema.addField(dimensionFieldSpec);
-
metricFieldSpec = new MetricFieldSpec("m1", FieldSpec.DataType.LONG);
metricFieldSpec.setTransformFunction("Groovy({function}, m2, m3)");
pinotSchema.addField(metricFieldSpec);
-
timeFieldSpec = new TimeFieldSpec("time", FieldSpec.DataType.LONG,
TimeUnit.MILLISECONDS);
timeFieldSpec.setTransformFunction("Groovy({function}, millis)");
pinotSchema.addField(timeFieldSpec);
-
Assert.assertTrue(SchemaFieldExtractorUtils.validate(pinotSchema));
// valid time field spec
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]