jihaozh commented on a change in pull request #3830: [TE] Presto Connector
backend and front end
URL: https://github.com/apache/incubator-pinot/pull/3830#discussion_r256602419
##########
File path:
thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/datasource/presto/SqlUtils.java
##########
@@ -0,0 +1,386 @@
+package org.apache.pinot.thirdeye.datasource.presto;
+
+import com.google.common.base.Joiner;
+import com.google.common.base.Predicate;
+import com.google.common.collect.Collections2;
+import com.google.common.collect.Multimap;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.ExecutionException;
+import javax.annotation.Nullable;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.pinot.thirdeye.common.time.TimeGranularity;
+import org.apache.pinot.thirdeye.common.time.TimeSpec;
+import org.apache.pinot.thirdeye.constant.MetricAggFunction;
+import org.apache.pinot.thirdeye.datalayer.dto.MetricConfigDTO;
+import org.apache.pinot.thirdeye.datasource.MetricFunction;
+import org.apache.pinot.thirdeye.datasource.ThirdEyeRequest;
+import org.apache.pinot.thirdeye.datasource.pinot.PqlUtils;
+import org.apache.pinot.thirdeye.util.ThirdEyeUtils;
+import org.joda.time.DateTime;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+public class SqlUtils {
+
+ private static final Joiner AND = Joiner.on(" AND ");
+ private static final Joiner COMMA = Joiner.on(", ");
+
+ private static final String PREFIX_NOT_EQUALS = "!";
+ private static final String PREFIX_LESS_THAN = "<";
+ private static final String PREFIX_LESS_THAN_EQUALS = "<=";
+ private static final String PREFIX_GREATER_THAN = ">";
+ private static final String PREFIX_GREATER_THAN_EQUALS = ">=";
+
+ private static final String OPERATOR_EQUALS = "IN";
+ private static final String OPERATOR_NOT_EQUALS = "NOT IN";
+ private static final String OPERATOR_LESS_THAN = "<";
+ private static final String OPERATOR_LESS_THAN_EQUALS = "<=";
+ private static final String OPERATOR_GREATER_THAN = ">";
+ private static final String OPERATOR_GREATER_THAN_EQUALS = ">=";
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(PqlUtils.class);
+ private static final int DEFAULT_LIMIT = 100000;
+ private static final String PERCENTILE_TDIGEST_PREFIX = "percentileTDigest";
+
+
+ /**
+ * Returns sql to calculate the sum of all raw metrics required for
<tt>request</tt>, grouped by
+ * time within the requested date range. </br>
+ * Due to the summation, all metric column values can be assumed to be
doubles.
+ * @throws ExecutionException
+ */
+ public static String getSql(ThirdEyeRequest request, MetricFunction
metricFunction,
+ Multimap<String, String> filterSet, TimeSpec dataTimeSpec) {
+ return getSql(metricFunction, request.getStartTimeInclusive(),
request.getEndTimeExclusive(), filterSet,
+ request.getGroupBy(), request.getGroupByTimeGranularity(),
dataTimeSpec, request.getLimit());
+ }
+
+
+ private static String getSql(MetricFunction metricFunction, DateTime
startTime,
+ DateTime endTimeExclusive, Multimap<String, String> filterSet,
List<String> groupBy,
+ TimeGranularity timeGranularity, TimeSpec dataTimeSpec, int limit) {
+
+ MetricConfigDTO metricConfig =
ThirdEyeUtils.getMetricConfigFromId(metricFunction.getMetricId());
+ String dataset = metricFunction.getDataset();
+
+ StringBuilder sb = new StringBuilder();
+
+
+ String selectionClause = getSelectionClause(metricConfig, metricFunction,
groupBy, timeGranularity, dataTimeSpec);
+
+ String tableName = ThirdEyeUtils.computePrestoTableName(dataset);
+
+ sb.append("SELECT ").append(selectionClause).append(" FROM
").append(tableName);
+ String betweenClause = getBetweenClause(startTime, endTimeExclusive,
dataTimeSpec, dataset);
+ String datePartitionClause = getDatePartitionClause(startTime);
+ sb.append(" WHERE ")
+ .append(datePartitionClause)
+ .append(" AND ")
+ .append(betweenClause);
+
+ String dimensionWhereClause = getDimensionWhereClause(filterSet);
+ if (StringUtils.isNotBlank(dimensionWhereClause)) {
+ sb.append(" AND ").append(dimensionWhereClause);
+ }
+
+ if (limit <= 0) {
+ limit = DEFAULT_LIMIT;
+ }
+
+ String groupByClause = getDimensionGroupByClause(groupBy, timeGranularity,
dataTimeSpec);
+ if (StringUtils.isNotBlank(groupByClause)) {
+ sb.append(" ").append(groupByClause);
+ sb.append(" LIMIT " + limit);
+ }
+
+ return sb.toString();
+ }
+
+ public static String getMaxDataTimeSQL(String timeColumn, String tableName) {
+ return "SELECT MAX(" + timeColumn + ") FROM " + tableName + " WHERE
datepartition >= daysago(1)";
+ }
+
+ public static String getDimensionFiltersSQL(String dimension, String
tableName) {
+ return "SELECT DISTINCT(" + dimension + ") FROM " + tableName + " WHERE
datepartition >= daysago(1)";
+ }
+
+ private static String getSelectionClause(MetricConfigDTO metricConfig,
MetricFunction metricFunction, List<String> groupByKeys, TimeGranularity
granularity, TimeSpec dateTimeSpec) {
+ StringBuilder builder = new StringBuilder();
+
+ if (granularity != null) {
+ String timeFormat = dateTimeSpec.getFormat();
+ // Epoch case
+ if (timeFormat == null ||
TimeSpec.SINCE_EPOCH_FORMAT.equals(timeFormat)) {
+ builder.append(dateTimeSpec.getColumnName()).append(", ");
+ } else { //timeFormat case
+ builder.append(dateTimeSpec.getColumnName()).append(", ");
+ }
+ }
+
+ for (String groupByKey: groupByKeys) {
+ builder.append(groupByKey).append(", ");
+ }
+
+ String metricName = null;
+ if (metricFunction.getMetricName().equals("*")) {
+ metricName = "*";
+ } else {
+ metricName = metricConfig.getName();
+ }
+
builder.append(convertAggFunction(metricFunction.getFunctionName())).append("(").append(metricName).append(")");
+
+ return builder.toString();
+ }
+
+ static String getBetweenClause(DateTime start, DateTime endExclusive,
TimeSpec timeFieldSpec, String dataset) {
+ TimeGranularity dataGranularity = timeFieldSpec.getDataGranularity();
+ long dataGranularityMillis = dataGranularity.toMillis();
+
+ String timeField = timeFieldSpec.getColumnName();
+ String timeFormat = timeFieldSpec.getFormat();
+
+ // epoch case
+ if (timeFormat == null || TimeSpec.SINCE_EPOCH_FORMAT.equals(timeFormat)) {
+ long startUnits = (long) Math.ceil(start.getMillis() / (double)
dataGranularityMillis);
+ long endUnits = (long) Math.ceil(endExclusive.getMillis() / (double)
dataGranularityMillis);
+
+ // point query
+ if (startUnits == endUnits) {
+ return String.format(" %s = %d", timeField, startUnits);
+ }
+
+ return String.format(" %s BETWEEN %d AND %d", timeField, startUnits,
endUnits);
+ }
+
+ // NOTE:
+ // this is crazy. epoch rounds up, but timeFormat down
+ // we maintain this behavior for backward compatibility.
+ long startUnits = (long) Math.ceil(start.getMillis()) / 1000;
+ long endUnits = (long) Math.ceil(endExclusive.getMillis()) / 1000;
+
+ // point query
Review comment:
same as above
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]