This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 8474baee80 Optimize dateTimeConvert scalar function to only parse the
format once (#8939)
8474baee80 is described below
commit 8474baee80831fccf14d3218c26cd68db7a1bc7d
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Mon Jun 20 22:51:34 2022 -0700
Optimize dateTimeConvert scalar function to only parse the format once
(#8939)
Make `dateTimeConvert` scalar function stateful so that the format is
parsed only once across multiple invocations. The state is persisted within the
scope of `FunctionInvoker`, so multiple functions with the same name won't
share the same state.
---
.../common/function/scalar/DateTimeConvert.java | 80 ++++++++++++++++++++++
.../common/function/scalar/DateTimeFunctions.java | 48 -------------
.../core/data/function/DateTimeFunctionsTest.java | 34 ++++++++-
3 files changed, 111 insertions(+), 51 deletions(-)
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/DateTimeConvert.java
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/DateTimeConvert.java
new file mode 100644
index 0000000000..024b92030f
--- /dev/null
+++
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/DateTimeConvert.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.common.function.scalar;
+
+import org.apache.pinot.spi.annotations.ScalarFunction;
+import org.apache.pinot.spi.data.DateTimeFieldSpec;
+import org.apache.pinot.spi.data.DateTimeFormatSpec;
+import org.apache.pinot.spi.data.DateTimeGranularitySpec;
+import org.joda.time.DateTime;
+import org.joda.time.format.DateTimeFormatter;
+
+
+/**
+ * Equivalent to {@code DateTimeConversionTransformFunction}.
+ */
+public class DateTimeConvert {
+ private DateTimeFormatSpec _inputFormatSpec;
+ private DateTimeFormatSpec _outputFormatSpec;
+ private DateTimeGranularitySpec _granularitySpec;
+
+ @ScalarFunction
+ public String dateTimeConvert(String timeValueStr, String inputFormatStr,
String outputFormatStr,
+ String outputGranularityStr) {
+ if (_inputFormatSpec == null) {
+ _inputFormatSpec = new DateTimeFormatSpec(inputFormatStr);
+ _outputFormatSpec = new DateTimeFormatSpec(outputFormatStr);
+ _granularitySpec = new DateTimeGranularitySpec(outputGranularityStr);
+ }
+
+ long timeValueMs = _inputFormatSpec.fromFormatToMillis(timeValueStr);
+ if (_outputFormatSpec.getTimeFormat() ==
DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT) {
+ DateTimeFormatter outputFormatter =
_outputFormatSpec.getDateTimeFormatter();
+ DateTime dateTime = new DateTime(timeValueMs, outputFormatter.getZone());
+ int size = _granularitySpec.getSize();
+ switch (_granularitySpec.getTimeUnit()) {
+ case MILLISECONDS:
+ dateTime = dateTime.withMillisOfSecond((dateTime.getMillisOfSecond()
/ size) * size);
+ break;
+ case SECONDS:
+ dateTime = dateTime.withSecondOfMinute((dateTime.getSecondOfMinute()
/ size) * size).secondOfMinute()
+ .roundFloorCopy();
+ break;
+ case MINUTES:
+ dateTime =
+ dateTime.withMinuteOfHour((dateTime.getMinuteOfHour() / size) *
size).minuteOfHour().roundFloorCopy();
+ break;
+ case HOURS:
+ dateTime = dateTime.withHourOfDay((dateTime.getHourOfDay() / size) *
size).hourOfDay().roundFloorCopy();
+ break;
+ case DAYS:
+ dateTime =
+ dateTime.withDayOfMonth(((dateTime.getDayOfMonth() - 1) / size)
* size + 1).dayOfMonth().roundFloorCopy();
+ break;
+ default:
+ break;
+ }
+ return outputFormatter.print(dateTime);
+ } else {
+ long granularityMs = _granularitySpec.granularityToMillis();
+ long roundedTimeValueMs = timeValueMs / granularityMs * granularityMs;
+ return _outputFormatSpec.fromMillisToFormat(roundedTimeValueMs);
+ }
+ }
+}
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/DateTimeFunctions.java
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/DateTimeFunctions.java
index 0c98dcf20c..f350288602 100644
---
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/DateTimeFunctions.java
+++
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/DateTimeFunctions.java
@@ -25,13 +25,9 @@ import
org.apache.pinot.common.function.DateTimePatternHandler;
import org.apache.pinot.common.function.DateTimeUtils;
import org.apache.pinot.common.function.TimeZoneKey;
import org.apache.pinot.spi.annotations.ScalarFunction;
-import org.apache.pinot.spi.data.DateTimeFieldSpec;
-import org.apache.pinot.spi.data.DateTimeFormatSpec;
-import org.apache.pinot.spi.data.DateTimeGranularitySpec;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.chrono.ISOChronology;
-import org.joda.time.format.DateTimeFormatter;
/**
* Inbuilt date time related transform functions
@@ -680,50 +676,6 @@ public class DateTimeFunctions {
.roundFloor(TimeUnit.MILLISECONDS.convert(timeValue, inputTimeUnit)),
TimeUnit.MILLISECONDS);
}
- /**
- * Equivalent to {@code DateTimeConversionTransformFunction}. Both input and
output are string type to support simple
- * date format.
- */
- @ScalarFunction
- public static String dateTimeConvert(String timeValueStr, String
inputFormatStr, String outputFormatStr,
- String outputGranularityStr) {
- long timeValueMs = new
DateTimeFormatSpec(inputFormatStr).fromFormatToMillis(timeValueStr);
- DateTimeFormatSpec outputFormat = new DateTimeFormatSpec(outputFormatStr);
- DateTimeGranularitySpec granularitySpec = new
DateTimeGranularitySpec(outputGranularityStr);
- if (outputFormat.getTimeFormat() ==
DateTimeFieldSpec.TimeFormat.SIMPLE_DATE_FORMAT) {
- DateTimeFormatter outputFormatter = outputFormat.getDateTimeFormatter();
- DateTime dateTime = new DateTime(timeValueMs, outputFormatter.getZone());
- int size = granularitySpec.getSize();
- switch (granularitySpec.getTimeUnit()) {
- case MILLISECONDS:
- dateTime = dateTime.withMillisOfSecond((dateTime.getMillisOfSecond()
/ size) * size);
- break;
- case SECONDS:
- dateTime = dateTime.withSecondOfMinute((dateTime.getSecondOfMinute()
/ size) * size).secondOfMinute()
- .roundFloorCopy();
- break;
- case MINUTES:
- dateTime =
- dateTime.withMinuteOfHour((dateTime.getMinuteOfHour() / size) *
size).minuteOfHour().roundFloorCopy();
- break;
- case HOURS:
- dateTime = dateTime.withHourOfDay((dateTime.getHourOfDay() / size) *
size).hourOfDay().roundFloorCopy();
- break;
- case DAYS:
- dateTime = dateTime.withDayOfMonth(((dateTime.getDayOfMonth() - 1) /
size) * size + 1).dayOfMonth()
- .roundFloorCopy();
- break;
- default:
- break;
- }
- return outputFormatter.print(dateTime);
- } else {
- long granularityMs = granularitySpec.granularityToMillis();
- long roundedTimeValueMs = timeValueMs / granularityMs * granularityMs;
- return outputFormat.fromMillisToFormat(roundedTimeValueMs);
- }
- }
-
/**
* Add a time period to the provided timestamp.
* e.g. timestampAdd('days', 10, NOW()) will add 10 days to the current
timestamp and return the value
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/data/function/DateTimeFunctionsTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/data/function/DateTimeFunctionsTest.java
index e85bec5d38..b746b570c2 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/data/function/DateTimeFunctionsTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/data/function/DateTimeFunctionsTest.java
@@ -30,10 +30,11 @@ import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.ISODateTimeFormat;
-import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
+import static org.testng.Assert.assertEquals;
+
/**
* Tests the Pinot inbuilt transform functions
@@ -47,8 +48,8 @@ public class DateTimeFunctionsTest {
private void testFunction(String functionExpression, List<String>
expectedArguments, GenericRow row,
Object expectedResult) {
InbuiltFunctionEvaluator evaluator = new
InbuiltFunctionEvaluator(functionExpression);
- Assert.assertEquals(evaluator.getArguments(), expectedArguments);
- Assert.assertEquals(evaluator.evaluate(row), expectedResult);
+ assertEquals(evaluator.getArguments(), expectedArguments);
+ assertEquals(evaluator.evaluate(row), expectedResult);
}
@Test(dataProvider = "dateTimeFunctionsDataProvider")
@@ -567,4 +568,31 @@ public class DateTimeFunctionsTest {
testFunction(String.format("dateTimeConvert(timeCol, '%s', '%s', '%s')",
inputFormatStr, outputFormatStr,
outputGranularityStr), arguments, row, expectedResult == null ? null :
expectedResult.toString());
}
+
+ private void testMultipleInvocations(String functionExpression,
List<GenericRow> rows, List<Object> expectedResults) {
+ InbuiltFunctionEvaluator evaluator = new
InbuiltFunctionEvaluator(functionExpression);
+ int numInvocations = rows.size();
+ assertEquals(expectedResults.size(), numInvocations);
+ for (int i = 0; i < numInvocations; i++) {
+ assertEquals(evaluator.evaluate(rows.get(i)),
expectedResults.get(i).toString());
+ }
+ }
+
+ @Test
+ public void testDateTimeConvertMultipleInvocations() {
+ String inputFormatStr = "SIMPLE_DATE_FORMAT|yyyyMMdd";
+ String outputFormatStr = "EPOCH|MILLISECONDS";
+ String outputGranularityStr = "1:DAYS";
+
+ List<GenericRow> rows = new ArrayList<>(10);
+ List<Object> expectedResults = new ArrayList<>(10);
+ for (int i = 0; i < 10; i++) {
+ GenericRow row = new GenericRow();
+ row.putValue("timeCol", 20200101 + i);
+ rows.add(row);
+ expectedResults.add(1577836800000L + 24 * 3600000 * i);
+ }
+ testMultipleInvocations(String.format("dateTimeConvert(timeCol, '%s',
'%s', '%s')", inputFormatStr, outputFormatStr,
+ outputGranularityStr), rows, expectedResults);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]