This is an automated email from the ASF dual-hosted git repository. volodymyr pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/drill.git
commit 3c798d338f4f4dab6713956e0a94d18e6e5c72bd Author: Charles S. Givre <[email protected]> AuthorDate: Mon Mar 25 13:45:01 2019 -0400 DRILL-7077: Add Function to Facilitate Time Series Analysis closes #1680 --- .../drill/exec/udfs/NearestDateFunctions.java | 147 ++++++++++++++++++++ .../apache/drill/exec/udfs/NearestDateUtils.java | 151 ++++++++++++++++++++ .../drill/exec/udfs/TestNearestDateFunctions.java | 153 +++++++++++++++++++++ 3 files changed, 451 insertions(+) diff --git a/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/NearestDateFunctions.java b/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/NearestDateFunctions.java new file mode 100644 index 0000000..f61a216 --- /dev/null +++ b/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/NearestDateFunctions.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.udfs; + +import org.apache.drill.exec.expr.DrillSimpleFunc; +import org.apache.drill.exec.expr.annotations.FunctionTemplate; +import org.apache.drill.exec.expr.annotations.Output; +import org.apache.drill.exec.expr.annotations.Param; +import org.apache.drill.exec.expr.holders.TimeStampHolder; +import org.apache.drill.exec.expr.holders.VarCharHolder; + +public class NearestDateFunctions { + + /** + * This function takes two arguments, an input date object, and an interval and returns + * the previous date that is the first date in that period. This function is intended to be used in time series analysis to + * aggregate by various units of time. + * Usage is: + * <p> + * SELECT <date_field>, COUNT(*) AS event_count + * FROM ... + * GROUP BY nearestDate(`date_field`, 'QUARTER') + * <p> + * Currently supports the following time units: + * <p> + * YEAR + * QUARTER + * MONTH + * WEEK_SUNDAY + * WEEK_MONDAY + * DAY + * HOUR + * HALF_HOUR + * QUARTER_HOUR + * MINUTE + * 30SECOND + * 15SECOND + * SECOND + */ + @FunctionTemplate(name = "nearestDate", + scope = FunctionTemplate.FunctionScope.SIMPLE, + nulls = FunctionTemplate.NullHandling.NULL_IF_NULL) + public static class NearestDateFunction implements DrillSimpleFunc { + + @Param + TimeStampHolder inputDate; + + @Param + VarCharHolder interval; + + @Output + TimeStampHolder out; + + @Override + public void setup() { + } + + @Override + public void eval() { + + String input = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(interval.start, interval.end, interval.buffer); + java.time.LocalDateTime ld = java.time.LocalDateTime.ofInstant(java.time.Instant.ofEpochMilli(inputDate.value), java.time.ZoneId.of("UTC")); + + java.time.LocalDateTime td = org.apache.drill.exec.udfs.NearestDateUtils.getDate(ld, input); + out.value = td.atZone(java.time.ZoneId.of("UTC")).toInstant().toEpochMilli(); + } + } + + /** + * This function takes three arguments, an input date string, an input date format string, and an interval and returns + * the previous date that is the first date in that period. This function is intended to be used in time series analysis to + * aggregate by various units of time. + * Usage is: + * <p> + * SELECT <date_field>, COUNT(*) AS event_count + * FROM ... + * GROUP BY nearestDate(`date_field`, 'yyyy-mm-dd', 'QUARTER') + * <p> + * Currently supports the following time units: + * <p> + * YEAR + * QUARTER + * MONTH + * WEEK_SUNDAY + * WEEK_MONDAY + * DAY + * HOUR + * HALF_HOUR + * QUARTER_HOUR + * MINUTE + * 30SECOND + * 15SECOND + * SECOND + */ + @FunctionTemplate(name = "nearestDate", + scope = FunctionTemplate.FunctionScope.SIMPLE, + nulls = FunctionTemplate.NullHandling.NULL_IF_NULL) + public static class NearestDateFunctionWithString implements DrillSimpleFunc { + + @Param + VarCharHolder input; + + @Param + VarCharHolder formatString; + + @Param + VarCharHolder interval; + + @Output + TimeStampHolder out; + + @Override + public void setup() { + } + + @Override + public void eval() { + String inputDate = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer); + + String format = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(formatString.start, formatString.end, formatString.buffer); + + String intervalString = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(interval.start, interval.end, interval.buffer); + + java.time.format.DateTimeFormatter formatter = java.time.format.DateTimeFormatter.ofPattern(format); + java.time.LocalDateTime dateTime = java.time.LocalDateTime.parse(inputDate, formatter); + + java.time.LocalDateTime td = org.apache.drill.exec.udfs.NearestDateUtils.getDate(dateTime, intervalString); + out.value = td.atZone(java.time.ZoneId.of("UTC")).toInstant().toEpochMilli(); + } + } +} diff --git a/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/NearestDateUtils.java b/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/NearestDateUtils.java new file mode 100644 index 0000000..7d66f3b --- /dev/null +++ b/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/NearestDateUtils.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.udfs; + +import org.apache.drill.common.exceptions.DrillRuntimeException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.time.temporal.TemporalAdjusters; +import java.time.LocalDateTime; +import java.time.DayOfWeek; +import java.time.temporal.ChronoUnit; +import java.util.Arrays; + +public class NearestDateUtils { + /** + * Specifies the time grouping to be used with the nearest date function + */ + protected enum TimeInterval { + YEAR, + QUARTER, + MONTH, + WEEK_SUNDAY, + WEEK_MONDAY, + DAY, + HOUR, + HALF_HOUR, + QUARTER_HOUR, + MINUTE, + HALF_MINUTE, + QUARTER_MINUTE, + SECOND + } + + private static final Logger logger = LoggerFactory.getLogger(NearestDateUtils.class); + + /** + * This function takes a Java LocalDateTime object, and an interval string and returns + * the nearest date closets to that time. For instance, if you specified the date as 2018-05-04 and YEAR, the function + * will return 2018-01-01 + * + * @param d the original datetime before adjustments + * @param interval The interval string to deduct from the supplied date + * @return the modified LocalDateTime + */ + public final static java.time.LocalDateTime getDate(java.time.LocalDateTime d, String interval) { + java.time.LocalDateTime newDate = d; + int year = d.getYear(); + int month = d.getMonth().getValue(); + int day = d.getDayOfMonth(); + int hour = d.getHour(); + int minute = d.getMinute(); + int second = d.getSecond(); + TimeInterval adjustmentAmount; + try { + adjustmentAmount = TimeInterval.valueOf(interval.toUpperCase()); + } catch (IllegalArgumentException e) { + throw new DrillRuntimeException(String.format("[%s] is not a valid time statement. Expecting: %s", interval, Arrays.asList(TimeInterval.values()))); + } + switch (adjustmentAmount) { + case YEAR: + newDate = LocalDateTime.of(year, 1, 1, 0, 0, 0); + break; + case QUARTER: + newDate = LocalDateTime.of(year, (month / 3) * 3 + 1, 1, 0, 0, 0); + break; + case MONTH: + newDate = LocalDateTime.of(year, month, 1, 0, 0, 0); + break; + case WEEK_SUNDAY: + newDate = newDate.with(TemporalAdjusters.previousOrSame(DayOfWeek.SUNDAY)) + .truncatedTo(ChronoUnit.DAYS); + break; + case WEEK_MONDAY: + newDate = newDate.with(TemporalAdjusters.previousOrSame(DayOfWeek.MONDAY)) + .truncatedTo(ChronoUnit.DAYS); + break; + case DAY: + newDate = LocalDateTime.of(year, month, day, 0, 0, 0); + break; + case HOUR: + newDate = LocalDateTime.of(year, month, day, hour, 0, 0); + break; + case HALF_HOUR: + if (minute >= 30) { + minute = 30; + } else { + minute = 0; + } + newDate = LocalDateTime.of(year, month, day, hour, minute, 0); + break; + case QUARTER_HOUR: + if (minute >= 45) { + minute = 45; + } else if (minute >= 30) { + minute = 30; + } else if (minute >= 15) { + minute = 15; + } else { + minute = 0; + } + newDate = LocalDateTime.of(year, month, day, hour, minute, 0); + break; + case MINUTE: + newDate = LocalDateTime.of(year, month, day, hour, minute, 0); + break; + case HALF_MINUTE: + if (second >= 30) { + second = 30; + } else { + second = 0; + } + newDate = LocalDateTime.of(year, month, day, hour, minute, second); + break; + case QUARTER_MINUTE: + if (second >= 45) { + second = 45; + } else if (second >= 30) { + second = 30; + } else if (second >= 15) { + second = 15; + } else { + second = 0; + } + newDate = LocalDateTime.of(year, month, day, hour, minute, second); + break; + case SECOND: + newDate = LocalDateTime.of(year, month, day, hour, minute, second); + break; + default: + throw new DrillRuntimeException(String.format("[%s] is not a valid time statement. Expecting: %s", interval, Arrays.asList(TimeInterval.values()))); + } + return newDate; + } +} diff --git a/contrib/udfs/src/test/java/org/apache/drill/exec/udfs/TestNearestDateFunctions.java b/contrib/udfs/src/test/java/org/apache/drill/exec/udfs/TestNearestDateFunctions.java new file mode 100644 index 0000000..490db83 --- /dev/null +++ b/contrib/udfs/src/test/java/org/apache/drill/exec/udfs/TestNearestDateFunctions.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.udfs; + +import org.apache.drill.categories.SqlFunctionTest; +import org.apache.drill.categories.UnlikelyTest; +import org.apache.drill.common.exceptions.DrillRuntimeException; +import org.apache.drill.test.ClusterFixture; +import org.apache.drill.test.ClusterFixtureBuilder; +import org.apache.drill.test.ClusterTest; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import java.util.Arrays; + +import java.time.LocalDateTime; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +@Category({UnlikelyTest.class, SqlFunctionTest.class}) +public class TestNearestDateFunctions extends ClusterTest { + + @BeforeClass + public static void setup() throws Exception { + ClusterFixtureBuilder builder = ClusterFixture.builder(dirTestWatcher); + startCluster(builder); + } + + @Test + public void testNearestDate() throws Exception { + String query = "SELECT nearestDate( TO_TIMESTAMP('2019-02-01 07:22:00', 'yyyy-MM-dd HH:mm:ss'), 'YEAR') AS nearest_year, " + + "nearestDate( TO_TIMESTAMP('2019-02-01 07:22:00', 'yyyy-MM-dd HH:mm:ss'), 'QUARTER') AS nearest_quarter, " + + "nearestDate( TO_TIMESTAMP('2019-02-15 07:22:00', 'yyyy-MM-dd HH:mm:ss'), 'MONTH') AS nearest_month, " + + "nearestDate( TO_TIMESTAMP('2019-02-15 07:22:00', 'yyyy-MM-dd HH:mm:ss'), 'DAY') AS nearest_day, " + + "nearestDate( TO_TIMESTAMP('2019-02-15 07:22:00', 'yyyy-MM-dd HH:mm:ss'), 'WEEK_SUNDAY') AS nearest_week_sunday, " + + "nearestDate( TO_TIMESTAMP('2019-02-15 07:22:00', 'yyyy-MM-dd HH:mm:ss'), 'WEEK_MONDAY') AS nearest_week_monday, " + + "nearestDate( TO_TIMESTAMP('2019-02-15 07:22:00', 'yyyy-MM-dd HH:mm:ss'), 'HOUR') AS nearest_hour, " + + "nearestDate( TO_TIMESTAMP('2019-02-15 07:42:00', 'yyyy-MM-dd HH:mm:ss'), 'HALF_HOUR') AS nearest_half_hour, " + + "nearestDate( TO_TIMESTAMP('2019-02-15 07:48:00', 'yyyy-MM-dd HH:mm:ss'), 'QUARTER_HOUR') AS nearest_quarter_hour, " + + "nearestDate( TO_TIMESTAMP('2019-02-15 07:22:00', 'yyyy-MM-dd HH:mm:ss'), 'MINUTE') AS nearest_minute, " + + "nearestDate( TO_TIMESTAMP('2019-02-15 07:22:22', 'yyyy-MM-dd HH:mm:ss'), 'HALF_MINUTE') AS nearest_30second, " + + "nearestDate( TO_TIMESTAMP('2019-02-15 07:22:22', 'yyyy-MM-dd HH:mm:ss'), 'QUARTER_MINUTE') AS nearest_15second, " + + "nearestDate( TO_TIMESTAMP('2019-02-15 07:22:31', 'yyyy-MM-dd HH:mm:ss'), 'SECOND') AS nearest_second " + + "FROM (VALUES(1))"; + testBuilder() + .sqlQuery(query) + .unOrdered() + .baselineColumns("nearest_year", + "nearest_quarter", + "nearest_month", + "nearest_day", + "nearest_week_sunday", + "nearest_week_monday", + "nearest_hour", + "nearest_half_hour", + "nearest_quarter_hour", + "nearest_minute", + "nearest_30second", + "nearest_15second", + "nearest_second") + .baselineValues(LocalDateTime.of(2019, 1, 1, 0, 0, 0), //Year + LocalDateTime.of(2019, 1, 1, 0, 0, 0), //Quarter + LocalDateTime.of(2019, 2, 1, 0, 0, 0), //Month + LocalDateTime.of(2019, 2, 15, 0, 0, 0), //Day + LocalDateTime.of(2019, 2, 10, 0, 0, 0), //Week Sunday + LocalDateTime.of(2019, 2, 11, 0, 0, 0), //Week Monday + LocalDateTime.of(2019, 2, 15, 7, 0, 0), //Hour + LocalDateTime.of(2019, 2, 15, 7, 30, 0), //Half Hour + LocalDateTime.of(2019, 2, 15, 7, 45, 0), //Quarter Hour + LocalDateTime.of(2019, 2, 15, 7, 22, 0), //Minute + LocalDateTime.of(2019, 2, 15, 7, 22, 0), //30Second + LocalDateTime.of(2019, 2, 15, 7, 22, 15), //15Second + LocalDateTime.of(2019, 2, 15, 7, 22, 31)) //Second + .go(); + } + + @Test + public void testNearestDateWithTimestamp() throws Exception { + String query = "SELECT nearestDate( '2019-02-01 07:22:00', 'yyyy-MM-dd HH:mm:ss', 'YEAR') AS nearest_year, " + + "nearestDate( '2019-02-01 07:22:00', 'yyyy-MM-dd HH:mm:ss', 'QUARTER') AS nearest_quarter, " + + "nearestDate( '2019-02-15 07:22:00', 'yyyy-MM-dd HH:mm:ss', 'MONTH') AS nearest_month, " + + "nearestDate( '2019-02-15 07:22:00', 'yyyy-MM-dd HH:mm:ss', 'DAY') AS nearest_day, " + + "nearestDate( '2019-02-15 07:22:00', 'yyyy-MM-dd HH:mm:ss', 'WEEK_SUNDAY') AS nearest_week_sunday, " + + "nearestDate( '2019-02-15 07:22:00', 'yyyy-MM-dd HH:mm:ss', 'WEEK_MONDAY') AS nearest_week_monday, " + + "nearestDate( '2019-02-15 07:22:00', 'yyyy-MM-dd HH:mm:ss', 'HOUR') AS nearest_hour, " + + "nearestDate( '2019-02-15 07:42:00', 'yyyy-MM-dd HH:mm:ss', 'HALF_HOUR') AS nearest_half_hour, " + + "nearestDate( '2019-02-15 07:48:00', 'yyyy-MM-dd HH:mm:ss', 'QUARTER_HOUR') AS nearest_quarter_hour, " + + "nearestDate( '2019-02-15 07:22:00', 'yyyy-MM-dd HH:mm:ss', 'MINUTE') AS nearest_minute, " + + "nearestDate( '2019-02-15 07:22:22', 'yyyy-MM-dd HH:mm:ss', 'HALF_MINUTE') AS nearest_30second, " + + "nearestDate( '2019-02-15 07:22:22', 'yyyy-MM-dd HH:mm:ss', 'QUARTER_MINUTE') AS nearest_15second, " + + "nearestDate( '2019-02-15 07:22:31', 'yyyy-MM-dd HH:mm:ss', 'SECOND') AS nearest_second " + + "FROM (VALUES(1))"; + testBuilder() + .sqlQuery(query) + .unOrdered() + .baselineColumns("nearest_year", + "nearest_quarter", + "nearest_month", + "nearest_day", + "nearest_week_sunday", + "nearest_week_monday", + "nearest_hour", + "nearest_half_hour", + "nearest_quarter_hour", + "nearest_minute", + "nearest_30second", + "nearest_15second", + "nearest_second") + .baselineValues(LocalDateTime.of(2019, 1, 1, 0, 0, 0), //Year + LocalDateTime.of(2019, 1, 1, 0, 0, 0), //Quarter + LocalDateTime.of(2019, 2, 1, 0, 0, 0), //Month + LocalDateTime.of(2019, 2, 15, 0, 0, 0), //Day + LocalDateTime.of(2019, 2, 10, 0, 0, 0), //Week Sunday + LocalDateTime.of(2019, 2, 11, 0, 0, 0), //Week Monday + LocalDateTime.of(2019, 2, 15, 7, 0, 0), //Hour + LocalDateTime.of(2019, 2, 15, 7, 30, 0), //Half Hour + LocalDateTime.of(2019, 2, 15, 7, 45, 0), //Quarter Hour + LocalDateTime.of(2019, 2, 15, 7, 22, 0), //Minute + LocalDateTime.of(2019, 2, 15, 7, 22, 0), //30Second + LocalDateTime.of(2019, 2, 15, 7, 22, 15), //15Second + LocalDateTime.of(2019, 2, 15, 7, 22, 31)) //Second + .go(); + } + + @Test + public void testReadException() throws Exception { + String query = "SELECT nearestDate( TO_TIMESTAMP('2019-02-01 07:22:00', 'yyyy-MM-dd HH:mm:ss'), 'BAD_DATE') AS nearest_year " + + "FROM (VALUES(1))"; + try { + run(query); + fail(); + } catch (DrillRuntimeException e) { + assertTrue(e.getMessage().contains("[BAD_DATE] is not a valid time statement. Expecting: " + Arrays.asList(NearestDateUtils.TimeInterval.values()))); + } + } +}
