APACHE-KYLIN-2725: Introduce a tool for creating system cubes relating to query & job metrics
Signed-off-by: lidongsjtu <lid...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/a0c9795f Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/a0c9795f Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/a0c9795f Branch: refs/heads/master Commit: a0c9795fb4dcedbe3f85e13b8aac160dbbe7de7c Parents: 29b9128 Author: Zhong <nju_y...@apache.org> Authored: Sun Aug 13 20:24:19 2017 +0800 Committer: lidongsjtu <lid...@apache.org> Committed: Thu Nov 2 17:36:02 2017 +0800 ---------------------------------------------------------------------- .../apache/kylin/common/KylinConfigBase.java | 4 + .../kylin/metadata/model/FunctionDesc.java | 8 + .../kylin/metadata/model/ParameterDesc.java | 6 +- .../kylin/metadata/model/PartitionDesc.java | 2 +- .../metrics/lib/ActiveReservoirReporter.java | 3 +- .../org/apache/kylin/metrics/lib/SinkTool.java | 32 + .../kylin/metrics/property/JobPropertyEnum.java | 2 +- .../metrics/property/QueryPropertyEnum.java | 2 +- tool/pom.xml | 5 + .../metrics/systemcube/CubeDescCreator.java | 673 +++++++++++++++++++ .../metrics/systemcube/CubeInstanceCreator.java | 88 +++ .../metrics/systemcube/HiveTableCreator.java | 278 ++++++++ .../metrics/systemcube/KylinTableCreator.java | 114 ++++ .../tool/metrics/systemcube/ModelCreator.java | 267 ++++++++ .../tool/metrics/systemcube/ProjectCreator.java | 101 +++ .../tool/metrics/systemcube/SCCreator.java | 262 ++++++++ .../metrics/systemcube/util/HiveSinkTool.java | 61 ++ tool/src/main/resources/SCSinkTools.json | 14 + .../tool/metrics/systemcube/SCCreatorTest.java | 89 +++ tool/src/test/resources/SCSinkTools.json | 1 + 20 files changed, 2007 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/a0c9795f/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java ---------------------------------------------------------------------- diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index 9a28240..3d67ee3 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -1340,6 +1340,10 @@ abstract public class KylinConfigBase implements Serializable { return Boolean.parseBoolean(getOptional("kylin.core.metrics.reporter-job-enabled", "false")); } + public String getKylinMetricsPrefix() { + return getOptional("kylin.core.metrics.prefix", "KYLIN").toUpperCase(); + } + public String getKylinMetricsActiveReservoirDefaultClass() { return getOptional("kylin.core.metrics.active-reservoir-default-class", "org.apache.kylin.metrics.lib.impl.StubReservoir"); http://git-wip-us.apache.org/repos/asf/kylin/blob/a0c9795f/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java index ce0b4c6..d8b33c0 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/FunctionDesc.java @@ -235,6 +235,14 @@ public class FunctionDesc implements Serializable { return parameter; } + public void setParameter(ParameterDesc parameter) { + this.parameter = parameter; + } + + public void setExpression(String expression) { + this.expression = expression; + } + public int getParameterCount() { int count = 0; for (ParameterDesc p = parameter; p != null; p = p.getNextParameter()) { http://git-wip-us.apache.org/repos/asf/kylin/blob/a0c9795f/core-metadata/src/main/java/org/apache/kylin/metadata/model/ParameterDesc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/ParameterDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/ParameterDesc.java index 930dc02..f757503 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/ParameterDesc.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/ParameterDesc.java @@ -85,6 +85,10 @@ public class ParameterDesc implements Serializable { return type; } + public void setType(String type) { + this.type = type; + } + public byte[] getBytes() throws UnsupportedEncodingException { return value.getBytes("UTF-8"); } @@ -93,7 +97,7 @@ public class ParameterDesc implements Serializable { return value; } - void setValue(String value) { + public void setValue(String value) { this.value = value; } http://git-wip-us.apache.org/repos/asf/kylin/blob/a0c9795f/core-metadata/src/main/java/org/apache/kylin/metadata/model/PartitionDesc.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/model/PartitionDesc.java b/core-metadata/src/main/java/org/apache/kylin/metadata/model/PartitionDesc.java index dcbbfd2..9d56dbb 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/model/PartitionDesc.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/model/PartitionDesc.java @@ -117,7 +117,7 @@ public class PartitionDesc implements Serializable { } // for test - void setPartitionTimeColumn(String partitionTimeColumn) { + public void setPartitionTimeColumn(String partitionTimeColumn) { this.partitionTimeColumn = partitionTimeColumn; } http://git-wip-us.apache.org/repos/asf/kylin/blob/a0c9795f/core-metrics/src/main/java/org/apache/kylin/metrics/lib/ActiveReservoirReporter.java ---------------------------------------------------------------------- diff --git a/core-metrics/src/main/java/org/apache/kylin/metrics/lib/ActiveReservoirReporter.java b/core-metrics/src/main/java/org/apache/kylin/metrics/lib/ActiveReservoirReporter.java index 6020865..463aa92 100644 --- a/core-metrics/src/main/java/org/apache/kylin/metrics/lib/ActiveReservoirReporter.java +++ b/core-metrics/src/main/java/org/apache/kylin/metrics/lib/ActiveReservoirReporter.java @@ -21,13 +21,14 @@ package org.apache.kylin.metrics.lib; import java.io.Closeable; import java.util.regex.Pattern; +import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.util.Pair; import com.google.common.base.Strings; public abstract class ActiveReservoirReporter implements Closeable { - public static final String KYLIN_PREFIX = "KYLIN"; + public static final String KYLIN_PREFIX = KylinConfig.getInstanceFromEnv().getKylinMetricsPrefix(); public static Pair<String, String> getTableNameSplits(String tableName) { if (Strings.isNullOrEmpty(tableName)) { http://git-wip-us.apache.org/repos/asf/kylin/blob/a0c9795f/core-metrics/src/main/java/org/apache/kylin/metrics/lib/SinkTool.java ---------------------------------------------------------------------- diff --git a/core-metrics/src/main/java/org/apache/kylin/metrics/lib/SinkTool.java b/core-metrics/src/main/java/org/apache/kylin/metrics/lib/SinkTool.java new file mode 100644 index 0000000..b55516a --- /dev/null +++ b/core-metrics/src/main/java/org/apache/kylin/metrics/lib/SinkTool.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.metrics.lib; + +import java.io.Serializable; +import java.util.Map; + +public interface SinkTool extends Serializable { + int getStorageType(); + + int getSourceType(); + + String getTableNameForMetrics(String subject); + + Map<String, String> getCubeDescOverrideProperties(); +} http://git-wip-us.apache.org/repos/asf/kylin/blob/a0c9795f/core-metrics/src/main/java/org/apache/kylin/metrics/property/JobPropertyEnum.java ---------------------------------------------------------------------- diff --git a/core-metrics/src/main/java/org/apache/kylin/metrics/property/JobPropertyEnum.java b/core-metrics/src/main/java/org/apache/kylin/metrics/property/JobPropertyEnum.java index bbe987a..64d13ac 100644 --- a/core-metrics/src/main/java/org/apache/kylin/metrics/property/JobPropertyEnum.java +++ b/core-metrics/src/main/java/org/apache/kylin/metrics/property/JobPropertyEnum.java @@ -21,7 +21,7 @@ package org.apache.kylin.metrics.property; import com.google.common.base.Strings; public enum JobPropertyEnum { - ID_CODE("JOB_ID"), USER("USER"), PROJECT("PROJECT"), CUBE("CUBE_NAME"), TYPE("JOB_TYPE"), ALGORITHM( + ID_CODE("JOB_ID"), USER("KUSER"), PROJECT("PROJECT"), CUBE("CUBE_NAME"), TYPE("JOB_TYPE"), ALGORITHM( "CUBING_TYPE"), STATUS("JOB_STATUS"), EXCEPTION("EXCEPTION"), // SOURCE_SIZE("TABLE_SIZE"), CUBE_SIZE("CUBE_SIZE"), BUILD_DURATION("DURATION"), WAIT_RESOURCE_TIME( "WAIT_RESOURCE_TIME"), PER_BYTES_TIME_COST("PER_BYTES_TIME_COST"), STEP_DURATION_DISTINCT_COLUMNS( http://git-wip-us.apache.org/repos/asf/kylin/blob/a0c9795f/core-metrics/src/main/java/org/apache/kylin/metrics/property/QueryPropertyEnum.java ---------------------------------------------------------------------- diff --git a/core-metrics/src/main/java/org/apache/kylin/metrics/property/QueryPropertyEnum.java b/core-metrics/src/main/java/org/apache/kylin/metrics/property/QueryPropertyEnum.java index 6fe5b0f..3f016b0 100644 --- a/core-metrics/src/main/java/org/apache/kylin/metrics/property/QueryPropertyEnum.java +++ b/core-metrics/src/main/java/org/apache/kylin/metrics/property/QueryPropertyEnum.java @@ -21,7 +21,7 @@ package org.apache.kylin.metrics.property; import com.google.common.base.Strings; public enum QueryPropertyEnum { - ID_CODE("QUERY_HASH_CODE"), TYPE("QUERY_TYPE"), USER("USER"), PROJECT("PROJECT"), REALIZATION( + ID_CODE("QUERY_HASH_CODE"), TYPE("QUERY_TYPE"), USER("KUSER"), PROJECT("PROJECT"), REALIZATION( "REALIZATION"), REALIZATION_TYPE("REALIZATION_TYPE"), EXCEPTION("EXCEPTION"), // TIME_COST("QUERY_TIME_COST"), CALCITE_RETURN_COUNT("CALCITE_COUNT_RETURN"), STORAGE_RETURN_COUNT( "STORAGE_COUNT_RETURN"), AGGR_FILTER_COUNT("CALCITE_COUNT_AGGREGATE_FILTER"); http://git-wip-us.apache.org/repos/asf/kylin/blob/a0c9795f/tool/pom.xml ---------------------------------------------------------------------- diff --git a/tool/pom.xml b/tool/pom.xml index 124f25e..7d4d29a 100644 --- a/tool/pom.xml +++ b/tool/pom.xml @@ -39,6 +39,11 @@ <dependencies> <dependency> <groupId>org.apache.kylin</groupId> + <artifactId>kylin-core-metrics</artifactId> + </dependency> + + <dependency> + <groupId>org.apache.kylin</groupId> <artifactId>kylin-storage-hbase</artifactId> </dependency> <dependency> http://git-wip-us.apache.org/repos/asf/kylin/blob/a0c9795f/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/CubeDescCreator.java ---------------------------------------------------------------------- diff --git a/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/CubeDescCreator.java b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/CubeDescCreator.java new file mode 100644 index 0000000..2be381c --- /dev/null +++ b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/CubeDescCreator.java @@ -0,0 +1,673 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.tool.metrics.systemcube; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.common.util.Pair; +import org.apache.kylin.cube.CubeDescManager; +import org.apache.kylin.cube.model.AggregationGroup; +import org.apache.kylin.cube.model.CubeDesc; +import org.apache.kylin.cube.model.DimensionDesc; +import org.apache.kylin.cube.model.HBaseColumnDesc; +import org.apache.kylin.cube.model.HBaseColumnFamilyDesc; +import org.apache.kylin.cube.model.HBaseMappingDesc; +import org.apache.kylin.cube.model.RowKeyColDesc; +import org.apache.kylin.cube.model.RowKeyDesc; +import org.apache.kylin.cube.model.SelectRule; +import org.apache.kylin.dimension.DictionaryDimEnc; +import org.apache.kylin.job.constant.JobStatusEnum; +import org.apache.kylin.measure.percentile.PercentileMeasureType; +import org.apache.kylin.metadata.model.FunctionDesc; +import org.apache.kylin.metadata.model.IEngineAware; +import org.apache.kylin.metadata.model.MeasureDesc; +import org.apache.kylin.metadata.model.ParameterDesc; +import org.apache.kylin.metrics.lib.SinkTool; +import org.apache.kylin.metrics.lib.impl.RecordEvent; +import org.apache.kylin.metrics.lib.impl.TimePropertyEnum; +import org.apache.kylin.metrics.property.JobPropertyEnum; +import org.apache.kylin.metrics.property.QueryCubePropertyEnum; +import org.apache.kylin.metrics.property.QueryPropertyEnum; +import org.apache.kylin.metrics.property.QueryRPCPropertyEnum; +import org.apache.kylin.tool.metrics.systemcube.util.HiveSinkTool; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; + +public class CubeDescCreator { + + public static void main(String[] args) throws Exception { + // KylinConfig.setSandboxEnvIfPossible(); + KylinConfig config = KylinConfig.getInstanceFromEnv(); + + CubeDesc kylinCubeDesc = generateKylinCubeDescForMetricsQuery(config, new HiveSinkTool()); + ByteArrayOutputStream buf = new ByteArrayOutputStream(); + DataOutputStream dout = new DataOutputStream(buf); + CubeDescManager.CUBE_DESC_SERIALIZER.serialize(kylinCubeDesc, dout); + dout.close(); + buf.close(); + System.out.println(buf.toString()); + } + + public static CubeDesc generateKylinCubeDescForMetricsQuery(KylinConfig config, SinkTool sinkTool) { + String tableName = sinkTool.getTableNameForMetrics(config.getKylinMetricsSubjectQuery()); + + //Set for dimensions + List<String> dimensions = ModelCreator.getDimensionsForMetricsQuery(); + dimensions.remove(TimePropertyEnum.DAY_TIME.toString()); + dimensions.remove(RecordEvent.RecordReserveKeyEnum.TIME.toString()); + + List<DimensionDesc> dimensionDescList = Lists.newArrayListWithExpectedSize(dimensions.size()); + for (String dimensionName : dimensions) { + dimensionDescList.add(getDimensionDesc(tableName, dimensionName)); + } + + //Set for measures + List<String> measures = ModelCreator.getMeasuresForMetricsQuery(); + measures.remove(QueryPropertyEnum.ID_CODE.toString()); + List<MeasureDesc> measureDescList = Lists.newArrayListWithExpectedSize(measures.size() * 2 + 1 + 1); + + List<Pair<String, String>> measureTypeList = HiveTableCreator.getHiveColumnsForMetricsQuery(); + Map<String, String> measureTypeMap = Maps.newHashMapWithExpectedSize(measureTypeList.size()); + for (Pair<String, String> entry : measureTypeList) { + measureTypeMap.put(entry.getKey(), entry.getValue()); + } + measureDescList.add(getMeasureCount()); + measureDescList.add(getMeasureMin(QueryPropertyEnum.TIME_COST.toString(), + measureTypeMap.get(QueryPropertyEnum.TIME_COST.toString()))); + for (String measure : measures) { + measureDescList.add(getMeasureSum(measure, measureTypeMap.get(measure))); + measureDescList.add(getMeasureMax(measure, measureTypeMap.get(measure))); + } + measureDescList.add(getMeasureHLL(QueryPropertyEnum.ID_CODE.toString())); + measureDescList.add(getMeasurePercentile(QueryPropertyEnum.TIME_COST.toString())); + + //Set for row key + RowKeyColDesc[] rowKeyColDescs = new RowKeyColDesc[dimensionDescList.size()]; + int idx = getTimeRowKeyColDesc(tableName, rowKeyColDescs); + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, QueryPropertyEnum.USER.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, QueryPropertyEnum.PROJECT.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, QueryPropertyEnum.REALIZATION.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, QueryPropertyEnum.REALIZATION_TYPE.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, QueryPropertyEnum.EXCEPTION.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, QueryPropertyEnum.TYPE.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, RecordEvent.RecordReserveKeyEnum.HOST.toString(), idx + 1); + idx++; + + RowKeyDesc rowKeyDesc = new RowKeyDesc(); + rowKeyDesc.setRowkeyColumns(rowKeyColDescs); + + //Set for aggregation group + String[][] hierarchy_dims = new String[2][]; + hierarchy_dims[0] = getTimeHierarchy(); + hierarchy_dims[1] = new String[2]; + hierarchy_dims[1][0] = QueryPropertyEnum.REALIZATION_TYPE.toString(); + hierarchy_dims[1][1] = QueryPropertyEnum.REALIZATION.toString(); + for (int i = 0; i < hierarchy_dims.length; i++) { + hierarchy_dims[i] = refineColumnWithTable(tableName, hierarchy_dims[i]); + } + + SelectRule selectRule = new SelectRule(); + selectRule.mandatoryDims = new String[0]; + selectRule.hierarchyDims = hierarchy_dims; + selectRule.jointDims = new String[0][0]; + + AggregationGroup aggGroup = new AggregationGroup(); + aggGroup.setIncludes(refineColumnWithTable(tableName, dimensions)); + aggGroup.setSelectRule(selectRule); + + //Set for hbase mapping + HBaseMappingDesc hBaseMapping = new HBaseMappingDesc(); + hBaseMapping.setColumnFamily(getHBaseColumnFamily(measureDescList)); + + return generateKylinCubeDesc(tableName, sinkTool.getStorageType(), dimensionDescList, measureDescList, + rowKeyDesc, aggGroup, hBaseMapping, sinkTool.getCubeDescOverrideProperties()); + } + + public static CubeDesc generateKylinCubeDescForMetricsQueryCube(KylinConfig config, SinkTool sinkTool) { + String tableName = sinkTool.getTableNameForMetrics(config.getKylinMetricsSubjectQueryCube()); + + //Set for dimensions + List<String> dimensions = ModelCreator.getDimensionsForMetricsQueryCube(); + dimensions.remove(TimePropertyEnum.DAY_TIME.toString()); + dimensions.remove(RecordEvent.RecordReserveKeyEnum.TIME.toString()); + dimensions.remove(RecordEvent.RecordReserveKeyEnum.HOST.toString()); + dimensions.remove(QueryCubePropertyEnum.PROJECT.toString()); + + List<DimensionDesc> dimensionDescList = Lists.newArrayListWithExpectedSize(dimensions.size()); + for (String dimensionName : dimensions) { + dimensionDescList.add(getDimensionDesc(tableName, dimensionName)); + } + + //Set for measures + List<String> measures = ModelCreator.getMeasuresForMetricsQueryCube(); + List<MeasureDesc> measureDescList = Lists.newArrayListWithExpectedSize(measures.size() * 2); + + List<Pair<String, String>> measureTypeList = HiveTableCreator.getHiveColumnsForMetricsQueryCube(); + Map<String, String> measureTypeMap = Maps.newHashMapWithExpectedSize(measureTypeList.size()); + for (Pair<String, String> entry : measureTypeList) { + measureTypeMap.put(entry.getKey(), entry.getValue()); + } + measureDescList.add(getMeasureCount()); + for (String measure : measures) { + measureDescList.add(getMeasureSum(measure, measureTypeMap.get(measure))); + if (!measure.equals(QueryCubePropertyEnum.WEIGHT_PER_HIT.toString())) { + measureDescList.add(getMeasureMax(measure, measureTypeMap.get(measure))); + } + } + + //Set for row key + RowKeyColDesc[] rowKeyColDescs = new RowKeyColDesc[dimensionDescList.size()]; + int idx = getTimeRowKeyColDesc(tableName, rowKeyColDescs); + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, QueryCubePropertyEnum.CUBE.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, QueryCubePropertyEnum.SEGMENT.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, QueryCubePropertyEnum.CUBOID_SOURCE.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, QueryCubePropertyEnum.CUBOID_TARGET.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, QueryCubePropertyEnum.FILTER_MASK.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, QueryCubePropertyEnum.IF_MATCH.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, QueryCubePropertyEnum.IF_SUCCESS.toString(), idx + 1); + idx++; + + RowKeyDesc rowKeyDesc = new RowKeyDesc(); + rowKeyDesc.setRowkeyColumns(rowKeyColDescs); + + //Set for aggregation group + String[] mandatory_dims = new String[] { QueryCubePropertyEnum.CUBE.toString() }; + mandatory_dims = refineColumnWithTable(tableName, mandatory_dims); + + String[][] hierarchy_dims = new String[1][]; + hierarchy_dims[0] = getTimeHierarchy(); + for (int i = 0; i < hierarchy_dims.length; i++) { + hierarchy_dims[i] = refineColumnWithTable(tableName, hierarchy_dims[i]); + } + + String[][] joint_dims = new String[1][]; + joint_dims[0] = new String[] { QueryCubePropertyEnum.CUBOID_SOURCE.toString(), + QueryCubePropertyEnum.CUBOID_TARGET.toString() }; + for (int i = 0; i < joint_dims.length; i++) { + joint_dims[i] = refineColumnWithTable(tableName, joint_dims[i]); + } + + SelectRule selectRule = new SelectRule(); + selectRule.mandatoryDims = mandatory_dims; + selectRule.hierarchyDims = hierarchy_dims; + selectRule.jointDims = joint_dims; + + AggregationGroup aggGroup = new AggregationGroup(); + aggGroup.setIncludes(refineColumnWithTable(tableName, dimensions)); + aggGroup.setSelectRule(selectRule); + + //Set for hbase mapping + HBaseMappingDesc hBaseMapping = new HBaseMappingDesc(); + hBaseMapping.setColumnFamily(getHBaseColumnFamily(measureDescList)); + + return generateKylinCubeDesc(tableName, sinkTool.getStorageType(), dimensionDescList, measureDescList, + rowKeyDesc, aggGroup, hBaseMapping, sinkTool.getCubeDescOverrideProperties()); + } + + public static CubeDesc generateKylinCubeDescForMetricsQueryRPC(KylinConfig config, SinkTool sinkTool) { + String tableName = sinkTool.getTableNameForMetrics(config.getKylinMetricsSubjectQueryRpcCall()); + + //Set for dimensions + List<String> dimensions = ModelCreator.getDimensionsForMetricsQueryRPC(); + dimensions.remove(TimePropertyEnum.DAY_TIME.toString()); + dimensions.remove(RecordEvent.RecordReserveKeyEnum.TIME.toString()); + + List<DimensionDesc> dimensionDescList = Lists.newArrayListWithExpectedSize(dimensions.size()); + for (String dimensionName : dimensions) { + dimensionDescList.add(getDimensionDesc(tableName, dimensionName)); + } + + //Set for measures + List<String> measures = ModelCreator.getMeasuresForMetricsQueryRPC(); + List<MeasureDesc> measureDescList = Lists.newArrayListWithExpectedSize(measures.size() * 2 + 1 + 1); + + List<Pair<String, String>> measureTypeList = HiveTableCreator.getHiveColumnsForMetricsQueryRPC(); + Map<String, String> measureTypeMap = Maps.newHashMapWithExpectedSize(measureTypeList.size()); + for (Pair<String, String> entry : measureTypeList) { + measureTypeMap.put(entry.getKey(), entry.getValue()); + } + measureDescList.add(getMeasureCount()); + for (String measure : measures) { + measureDescList.add(getMeasureSum(measure, measureTypeMap.get(measure))); + measureDescList.add(getMeasureMax(measure, measureTypeMap.get(measure))); + } + measureDescList.add(getMeasurePercentile(QueryRPCPropertyEnum.CALL_TIME.toString())); + + //Set for row key + RowKeyColDesc[] rowKeyColDescs = new RowKeyColDesc[dimensionDescList.size()]; + int idx = getTimeRowKeyColDesc(tableName, rowKeyColDescs); + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, QueryRPCPropertyEnum.PROJECT.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, QueryRPCPropertyEnum.REALIZATION.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, QueryRPCPropertyEnum.RPC_SERVER.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, RecordEvent.RecordReserveKeyEnum.HOST.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, QueryRPCPropertyEnum.EXCEPTION.toString(), idx + 1); + idx++; + + RowKeyDesc rowKeyDesc = new RowKeyDesc(); + rowKeyDesc.setRowkeyColumns(rowKeyColDescs); + + //Set for aggregation group + String[][] hierarchy_dims = new String[1][]; + hierarchy_dims[0] = getTimeHierarchy(); + for (int i = 0; i < hierarchy_dims.length; i++) { + hierarchy_dims[i] = refineColumnWithTable(tableName, hierarchy_dims[i]); + } + + SelectRule selectRule = new SelectRule(); + selectRule.mandatoryDims = new String[0]; + selectRule.hierarchyDims = hierarchy_dims; + selectRule.jointDims = new String[0][0]; + + AggregationGroup aggGroup = new AggregationGroup(); + aggGroup.setIncludes(refineColumnWithTable(tableName, dimensions)); + aggGroup.setSelectRule(selectRule); + + //Set for hbase mapping + HBaseMappingDesc hBaseMapping = new HBaseMappingDesc(); + hBaseMapping.setColumnFamily(getHBaseColumnFamily(measureDescList)); + + return generateKylinCubeDesc(tableName, sinkTool.getStorageType(), dimensionDescList, measureDescList, + rowKeyDesc, aggGroup, hBaseMapping, sinkTool.getCubeDescOverrideProperties()); + } + + public static CubeDesc generateKylinCubeDescForMetricsJob(KylinConfig config, SinkTool sinkTool) { + String tableName = sinkTool.getTableNameForMetrics(config.getKylinMetricsSubjectJob()); + + //Set for dimensions + List<String> dimensions = ModelCreator.getDimensionsForMetricsJob(); + dimensions.remove(TimePropertyEnum.DAY_TIME.toString()); + dimensions.remove(RecordEvent.RecordReserveKeyEnum.TIME.toString()); + dimensions.remove(RecordEvent.RecordReserveKeyEnum.HOST.toString()); + + List<DimensionDesc> dimensionDescList = Lists.newArrayListWithExpectedSize(dimensions.size()); + for (String dimensionName : dimensions) { + dimensionDescList.add(getDimensionDesc(tableName, dimensionName)); + } + + //Set for measures + List<String> measures = ModelCreator.getMeasuresForMetricsJob(); + List<MeasureDesc> measureDescList = Lists.newArrayListWithExpectedSize((measures.size() - 4) * 3 + 1 + 1 + 4); + + Set<String> stepDuration = Sets.newHashSet(); + stepDuration.add(JobPropertyEnum.STEP_DURATION_DISTINCT_COLUMNS.toString()); + stepDuration.add(JobPropertyEnum.STEP_DURATION_DICTIONARY.toString()); + stepDuration.add(JobPropertyEnum.STEP_DURATION_INMEM_CUBING.toString()); + stepDuration.add(JobPropertyEnum.STEP_DURATION_HFILE_CONVERT.toString()); + + List<Pair<String, String>> measureTypeList = HiveTableCreator.getHiveColumnsForMetricsJob(); + Map<String, String> measureTypeMap = Maps.newHashMapWithExpectedSize(measureTypeList.size()); + for (Pair<String, String> entry : measureTypeList) { + measureTypeMap.put(entry.getKey(), entry.getValue()); + } + measureDescList.add(getMeasureCount()); + for (String measure : measures) { + measureDescList.add(getMeasureSum(measure, measureTypeMap.get(measure))); + measureDescList.add(getMeasureMax(measure, measureTypeMap.get(measure))); + if (!stepDuration.contains(measure)) { + measureDescList.add(getMeasureMin(measure, measureTypeMap.get(measure))); + } + } + measureDescList.add(getMeasurePercentile(JobPropertyEnum.BUILD_DURATION.toString())); + + //Set for row key + RowKeyColDesc[] rowKeyColDescs = new RowKeyColDesc[dimensionDescList.size()]; + int idx = getTimeRowKeyColDesc(tableName, rowKeyColDescs); + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, JobPropertyEnum.USER.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, JobPropertyEnum.PROJECT.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, JobPropertyEnum.CUBE.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, JobPropertyEnum.ALGORITHM.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, JobPropertyEnum.TYPE.toString(), idx + 1); + idx++; + + RowKeyDesc rowKeyDesc = new RowKeyDesc(); + rowKeyDesc.setRowkeyColumns(rowKeyColDescs); + + //Set for aggregation group + String[][] hierarchy_dims = new String[1][]; + hierarchy_dims[0] = getTimeHierarchy(); + for (int i = 0; i < hierarchy_dims.length; i++) { + hierarchy_dims[i] = refineColumnWithTable(tableName, hierarchy_dims[i]); + } + + SelectRule selectRule = new SelectRule(); + selectRule.mandatoryDims = new String[0]; + selectRule.hierarchyDims = hierarchy_dims; + selectRule.jointDims = new String[0][0]; + + AggregationGroup aggGroup = new AggregationGroup(); + aggGroup.setIncludes(refineColumnWithTable(tableName, dimensions)); + aggGroup.setSelectRule(selectRule); + + //Set for hbase mapping + HBaseMappingDesc hBaseMapping = new HBaseMappingDesc(); + hBaseMapping.setColumnFamily(getHBaseColumnFamily(measureDescList)); + + return generateKylinCubeDesc(tableName, sinkTool.getStorageType(), dimensionDescList, measureDescList, + rowKeyDesc, aggGroup, hBaseMapping, sinkTool.getCubeDescOverrideProperties()); + } + + public static CubeDesc generateKylinCubeDescForMetricsJobException(KylinConfig config, SinkTool sinkTool) { + String tableName = sinkTool.getTableNameForMetrics(config.getKylinMetricsSubjectJobException()); + + //Set for dimensions + List<String> dimensions = ModelCreator.getDimensionsForMetricsJobException(); + dimensions.remove(TimePropertyEnum.DAY_TIME.toString()); + dimensions.remove(RecordEvent.RecordReserveKeyEnum.TIME.toString()); + dimensions.remove(RecordEvent.RecordReserveKeyEnum.HOST.toString()); + + List<DimensionDesc> dimensionDescList = Lists.newArrayListWithExpectedSize(dimensions.size()); + for (String dimensionName : dimensions) { + dimensionDescList.add(getDimensionDesc(tableName, dimensionName)); + } + + //Set for measures + List<String> measures = ModelCreator.getMeasuresForMetricsJobException(); + measures.remove(JobPropertyEnum.ID_CODE.toString()); + List<MeasureDesc> measureDescList = Lists.newArrayListWithExpectedSize(1); + + measureDescList.add(getMeasureCount()); + + //Set for row key + RowKeyColDesc[] rowKeyColDescs = new RowKeyColDesc[dimensionDescList.size()]; + int idx = getTimeRowKeyColDesc(tableName, rowKeyColDescs); + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, JobPropertyEnum.USER.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, JobPropertyEnum.PROJECT.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, JobPropertyEnum.CUBE.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, JobPropertyEnum.ALGORITHM.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, JobPropertyEnum.TYPE.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, JobPropertyEnum.EXCEPTION.toString(), idx + 1); + idx++; + + RowKeyDesc rowKeyDesc = new RowKeyDesc(); + rowKeyDesc.setRowkeyColumns(rowKeyColDescs); + + //Set for aggregation group + String[][] hierarchy_dims = new String[1][]; + hierarchy_dims[0] = getTimeHierarchy(); + for (int i = 0; i < hierarchy_dims.length; i++) { + hierarchy_dims[i] = refineColumnWithTable(tableName, hierarchy_dims[i]); + } + + SelectRule selectRule = new SelectRule(); + selectRule.mandatoryDims = new String[0]; + selectRule.hierarchyDims = hierarchy_dims; + selectRule.jointDims = new String[0][0]; + + AggregationGroup aggGroup = new AggregationGroup(); + aggGroup.setIncludes(refineColumnWithTable(tableName, dimensions)); + aggGroup.setSelectRule(selectRule); + + //Set for hbase mapping + HBaseMappingDesc hBaseMapping = new HBaseMappingDesc(); + hBaseMapping.setColumnFamily(getHBaseColumnFamily(measureDescList)); + + return generateKylinCubeDesc(tableName, sinkTool.getStorageType(), dimensionDescList, measureDescList, + rowKeyDesc, aggGroup, hBaseMapping, sinkTool.getCubeDescOverrideProperties()); + } + + public static CubeDesc generateKylinCubeDesc(String tableName, int storageType, + List<DimensionDesc> dimensionDescList, List<MeasureDesc> measureDescList, RowKeyDesc rowKeyDesc, + AggregationGroup aggGroup, HBaseMappingDesc hBaseMapping, Map<String, String> overrideProperties) { + CubeDesc desc = new CubeDesc(); + desc.setName(tableName.replace('.', '_')); + desc.setModelName(tableName.replace('.', '_')); + desc.setDescription(""); + desc.setLastModified(0L); + desc.setDimensions(dimensionDescList); + desc.setMeasures(measureDescList); + desc.setRowkey(rowKeyDesc); + desc.setHbaseMapping(hBaseMapping); + desc.setNotifyList(Lists.<String> newArrayList()); + desc.setStatusNeedNotify(Lists.newArrayList(JobStatusEnum.ERROR.toString())); + desc.setAutoMergeTimeRanges(new long[] { 86400000L, 604800000L, 2419200000L }); + desc.setEngineType(IEngineAware.ID_MR_V2); + desc.setStorageType(storageType); + desc.setAggregationGroups(Lists.newArrayList(aggGroup)); + desc.getOverrideKylinProps().putAll(overrideProperties); + desc.setSignature(desc.calculateSignature()); + desc.updateRandomUuid(); + return desc; + } + + public static HBaseColumnFamilyDesc[] getHBaseColumnFamily(List<MeasureDesc> measureDescList) { + List<String> normalMeasureList = Lists.newLinkedList(); + List<String> largeMeasureList = Lists.newLinkedList(); + for (MeasureDesc measureDesc : measureDescList) { + if (measureDesc.getFunction().isCountDistinct() + || measureDesc.getFunction().getExpression().equals(PercentileMeasureType.FUNC_PERCENTILE)) { + largeMeasureList.add(measureDesc.getName()); + } else { + normalMeasureList.add(measureDesc.getName()); + } + } + List<HBaseColumnFamilyDesc> columnFamilyDescList = Lists.newLinkedList(); + int idx = 1; + if (normalMeasureList.size() > 0) { + HBaseColumnDesc columnDesc = new HBaseColumnDesc(); + columnDesc.setQualifier("M"); + columnDesc.setMeasureRefs(normalMeasureList.toArray(new String[normalMeasureList.size()])); + HBaseColumnFamilyDesc columnFamilyDesc = new HBaseColumnFamilyDesc(); + columnFamilyDesc.setName("F" + idx++); + columnFamilyDesc.setColumns(new HBaseColumnDesc[] { columnDesc }); + + columnFamilyDescList.add(columnFamilyDesc); + } + for (String largeMeasure : largeMeasureList) { + HBaseColumnDesc columnDesc = new HBaseColumnDesc(); + columnDesc.setQualifier("M"); + columnDesc.setMeasureRefs(new String[] { largeMeasure }); + HBaseColumnFamilyDesc columnFamilyDesc = new HBaseColumnFamilyDesc(); + columnFamilyDesc.setName("F" + idx++); + columnFamilyDesc.setColumns(new HBaseColumnDesc[] { columnDesc }); + + columnFamilyDescList.add(columnFamilyDesc); + } + + return columnFamilyDescList.toArray(new HBaseColumnFamilyDesc[columnFamilyDescList.size()]); + } + + public static String[] getTimeHierarchy() { + String[] result = new String[4]; + result[0] = TimePropertyEnum.YEAR.toString(); + result[1] = TimePropertyEnum.MONTH.toString(); + result[2] = TimePropertyEnum.WEEK_BEGIN_DATE.toString(); + result[3] = TimePropertyEnum.DAY_DATE.toString(); + return result; + } + + public static String[] refineColumnWithTable(String tableName, List<String> columns) { + String[] dimensions = new String[columns.size()]; + for (int i = 0; i < dimensions.length; i++) { + dimensions[i] = tableName.substring(tableName.lastIndexOf(".") + 1) + "." + columns.get(i); + } + return dimensions; + } + + public static String[] refineColumnWithTable(String tableName, String[] columns) { + String[] dimensions = new String[columns.length]; + for (int i = 0; i < dimensions.length; i++) { + dimensions[i] = tableName.substring(tableName.lastIndexOf(".") + 1) + "." + columns[i]; + } + return dimensions; + } + + public static int getTimeRowKeyColDesc(String tableName, RowKeyColDesc[] rowKeyColDescs) { + int idx = 0; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, TimePropertyEnum.DAY_DATE.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, TimePropertyEnum.WEEK_BEGIN_DATE.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, TimePropertyEnum.MONTH.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, TimePropertyEnum.YEAR.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, TimePropertyEnum.TIME_HOUR.toString(), idx + 1); + idx++; + rowKeyColDescs[idx] = getRowKeyColDesc(tableName, TimePropertyEnum.TIME_MINUTE.toString(), idx + 1); + idx++; + return idx; + } + + public static RowKeyColDesc getRowKeyColDesc(String tableName, String column, int id) { + RowKeyColDesc rowKeyColDesc = new RowKeyColDesc(); + rowKeyColDesc.setIndex(Integer.toString(id)); + rowKeyColDesc.setColumn(tableName.substring(tableName.lastIndexOf(".") + 1) + "." + column); + rowKeyColDesc.setEncoding(DictionaryDimEnc.ENCODING_NAME); + rowKeyColDesc.setShardBy(false); + return rowKeyColDesc; + } + + public static DimensionDesc getDimensionDesc(String tableName, String dimension) { + DimensionDesc dimensionDesc = new DimensionDesc(); + dimensionDesc.setName(dimension); + dimensionDesc.setTable(tableName.substring(tableName.lastIndexOf(".") + 1)); + dimensionDesc.setColumn(dimension); + return dimensionDesc; + } + + public static MeasureDesc getMeasureCount() { + ParameterDesc parameterDesc = new ParameterDesc(); + parameterDesc.setValue("1"); + parameterDesc.setType(FunctionDesc.PARAMETER_TYPE_CONSTANT); + + FunctionDesc function = new FunctionDesc(); + function.setExpression(FunctionDesc.FUNC_COUNT); + function.setParameter(parameterDesc); + function.setReturnType(HiveTableCreator.HiveTypeEnum.HBIGINT.toString()); + + MeasureDesc result = new MeasureDesc(); + result.setName("_COUNT_"); + result.setFunction(function); + return result; + } + + public static MeasureDesc getMeasureSum(String column, String dataType) { + ParameterDesc parameterDesc = new ParameterDesc(); + parameterDesc.setValue(column); + parameterDesc.setType(FunctionDesc.PARAMETER_TYPE_COLUMN); + + FunctionDesc function = new FunctionDesc(); + function.setExpression(FunctionDesc.FUNC_SUM); + function.setParameter(parameterDesc); + function.setReturnType(dataType.equals(HiveTableCreator.HiveTypeEnum.HDOUBLE.toString()) + ? HiveTableCreator.HiveTypeEnum.HDECIMAL.toString() + : dataType); + + MeasureDesc result = new MeasureDesc(); + result.setName(column + "_SUM"); + result.setFunction(function); + return result; + } + + public static MeasureDesc getMeasureMax(String column, String dataType) { + ParameterDesc parameterDesc = new ParameterDesc(); + parameterDesc.setValue(column); + parameterDesc.setType(FunctionDesc.PARAMETER_TYPE_COLUMN); + + FunctionDesc function = new FunctionDesc(); + function.setExpression(FunctionDesc.FUNC_MAX); + function.setParameter(parameterDesc); + function.setReturnType(dataType); + + MeasureDesc result = new MeasureDesc(); + result.setName(column + "_MAX"); + result.setFunction(function); + return result; + } + + public static MeasureDesc getMeasureMin(String column, String dataType) { + ParameterDesc parameterDesc = new ParameterDesc(); + parameterDesc.setValue(column); + parameterDesc.setType(FunctionDesc.PARAMETER_TYPE_COLUMN); + + FunctionDesc function = new FunctionDesc(); + function.setExpression(FunctionDesc.FUNC_MIN); + function.setParameter(parameterDesc); + function.setReturnType(dataType); + + MeasureDesc result = new MeasureDesc(); + result.setName(column + "_MIN"); + result.setFunction(function); + return result; + } + + public static MeasureDesc getMeasureHLL(String column) { + ParameterDesc parameterDesc = new ParameterDesc(); + parameterDesc.setValue(column); + parameterDesc.setType(FunctionDesc.PARAMETER_TYPE_COLUMN); + + FunctionDesc function = new FunctionDesc(); + function.setExpression(FunctionDesc.FUNC_COUNT_DISTINCT); + function.setParameter(parameterDesc); + function.setReturnType("hllc12"); + + MeasureDesc result = new MeasureDesc(); + result.setName(column + "_HLL"); + result.setFunction(function); + return result; + } + + public static MeasureDesc getMeasurePercentile(String column) { + ParameterDesc parameterDesc = new ParameterDesc(); + parameterDesc.setValue(column); + parameterDesc.setType(FunctionDesc.PARAMETER_TYPE_COLUMN); + + FunctionDesc function = new FunctionDesc(); + function.setExpression(PercentileMeasureType.FUNC_PERCENTILE); + function.setParameter(parameterDesc); + function.setReturnType("percentile(100)"); + + MeasureDesc result = new MeasureDesc(); + result.setName(column + "_PERCENTILE"); + result.setFunction(function); + return result; + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/a0c9795f/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/CubeInstanceCreator.java ---------------------------------------------------------------------- diff --git a/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/CubeInstanceCreator.java b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/CubeInstanceCreator.java new file mode 100644 index 0000000..c1672c0 --- /dev/null +++ b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/CubeInstanceCreator.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.tool.metrics.systemcube; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; + +import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.cube.CubeInstance; +import org.apache.kylin.cube.CubeManager; +import org.apache.kylin.cube.CubeSegment; +import org.apache.kylin.metadata.model.Segments; +import org.apache.kylin.metadata.realization.RealizationStatusEnum; +import org.apache.kylin.metrics.lib.SinkTool; +import org.apache.kylin.tool.metrics.systemcube.util.HiveSinkTool; + +public class CubeInstanceCreator { + + public static void main(String[] args) throws Exception { + // KylinConfig.setSandboxEnvIfPossible(); + KylinConfig config = KylinConfig.getInstanceFromEnv(); + + CubeInstance cubeInstance = generateKylinCubeInstanceForMetricsQuery("ADMIN", config, new HiveSinkTool()); + ByteArrayOutputStream buf = new ByteArrayOutputStream(); + DataOutputStream dout = new DataOutputStream(buf); + CubeManager.CUBE_SERIALIZER.serialize(cubeInstance, dout); + dout.close(); + buf.close(); + System.out.println(buf.toString()); + } + + public static CubeInstance generateKylinCubeInstanceForMetricsQuery(String owner, KylinConfig config, + SinkTool sinkTool) { + return generateKylinCubeInstance(owner, sinkTool.getTableNameForMetrics(config.getKylinMetricsSubjectQuery())); + } + + public static CubeInstance generateKylinCubeInstanceForMetricsQueryCube(String owner, KylinConfig config, + SinkTool sinkTool) { + return generateKylinCubeInstance(owner, + sinkTool.getTableNameForMetrics(config.getKylinMetricsSubjectQueryCube())); + } + + public static CubeInstance generateKylinCubeInstanceForMetricsQueryRPC(String owner, KylinConfig config, + SinkTool sinkTool) { + return generateKylinCubeInstance(owner, + sinkTool.getTableNameForMetrics(config.getKylinMetricsSubjectQueryRpcCall())); + } + + public static CubeInstance generateKylinCubeInstanceForMetricsJob(String owner, KylinConfig config, + SinkTool sinkTool) { + return generateKylinCubeInstance(owner, sinkTool.getTableNameForMetrics(config.getKylinMetricsSubjectJob())); + } + + public static CubeInstance generateKylinCubeInstanceForMetricsJobException(String owner, KylinConfig config, + SinkTool sinkTool) { + return generateKylinCubeInstance(owner, + sinkTool.getTableNameForMetrics(config.getKylinMetricsSubjectJobException())); + } + + public static CubeInstance generateKylinCubeInstance(String owner, String tableName) { + CubeInstance cubeInstance = new CubeInstance(); + cubeInstance.setName(tableName.replace('.', '_')); + cubeInstance.setSegments(new Segments<CubeSegment>()); + cubeInstance.setDescName(tableName.replace('.', '_')); + cubeInstance.setStatus(RealizationStatusEnum.DISABLED); + cubeInstance.setOwner(owner); + cubeInstance.setCreateTimeUTC(0L); + cubeInstance.updateRandomUuid(); + + return cubeInstance; + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/a0c9795f/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/HiveTableCreator.java ---------------------------------------------------------------------- diff --git a/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/HiveTableCreator.java b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/HiveTableCreator.java new file mode 100644 index 0000000..35b296a --- /dev/null +++ b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/HiveTableCreator.java @@ -0,0 +1,278 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.tool.metrics.systemcube; + +import java.util.List; + +import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.common.util.Pair; +import org.apache.kylin.metrics.lib.ActiveReservoirReporter; +import org.apache.kylin.metrics.lib.impl.RecordEvent; +import org.apache.kylin.metrics.lib.impl.TimePropertyEnum; +import org.apache.kylin.metrics.lib.impl.hive.HiveProducerRecord; +import org.apache.kylin.metrics.lib.impl.hive.HiveReservoirReporter; +import org.apache.kylin.metrics.property.JobPropertyEnum; +import org.apache.kylin.metrics.property.QueryCubePropertyEnum; +import org.apache.kylin.metrics.property.QueryPropertyEnum; +import org.apache.kylin.metrics.property.QueryRPCPropertyEnum; + +import com.google.common.base.Strings; +import com.google.common.collect.Lists; + +public class HiveTableCreator { + + public static void main(String[] args) { + // KylinConfig.setSandboxEnvIfPossible(); + KylinConfig config = KylinConfig.getInstanceFromEnv(); + + System.out.println(generateAllSQL(config)); + } + + public static String generateAllSQL(KylinConfig config) { + StringBuilder sb = new StringBuilder(); + sb.append(generateDatabaseSQL()); + sb.append("\n"); + sb.append(generateHiveTableSQLForMetricsQuery(config)); + sb.append("\n"); + sb.append(generateHiveTableSQLForMetricsQueryCUBE(config)); + sb.append("\n"); + sb.append(generateHiveTableSQLForMetricsQueryRPC(config)); + sb.append("\n"); + sb.append(generateHiveTableSQLForMetricsJob(config)); + sb.append("\n"); + sb.append(generateHiveTableSQLForMetricsJobException(config)); + + return sb.toString(); + } + + public static String generateDatabaseSQL() { + return "CREATE DATABASE IF NOT EXISTS " + ActiveReservoirReporter.KYLIN_PREFIX + ";\n"; + } + + public static String generateHiveTableSQL(String tableName, List<Pair<String, String>> columns, + List<Pair<String, String>> partitionKVs) { + StringBuilder sb = new StringBuilder(); + sb.append("DROP TABLE IF EXISTS " + tableName + ";\n"); + sb.append("\n"); + sb.append("CREATE TABLE " + tableName + "\n"); + sb.append("(\n"); + for (int i = 0; i < columns.size(); i++) { + if (i > 0) { + sb.append(","); + } + Pair<String, String> column = columns.get(i); + sb.append(column.getFirst() + " " + column.getSecond() + "\n"); + } + sb.append(")\n"); + if (partitionKVs != null && partitionKVs.size() > 0) { + sb.append("PARTITIONED BY("); + for (int i = 0; i < partitionKVs.size(); i++) { + if (i > 0) { + sb.append(","); + } + Pair<String, String> partitionKV = partitionKVs.get(i); + sb.append(partitionKV.getFirst() + " " + partitionKV.getSecond()); + } + sb.append(")\n"); + } + sb.append("ROW FORMAT DELIMITED FIELDS TERMINATED BY '" + HiveProducerRecord.DELIMITER + "'\n"); + sb.append("STORED AS TEXTFILE;\n"); + return sb.toString(); + } + + public static String generateHiveTableSQLForMetricsQuery(KylinConfig config) { + String tableName = HiveReservoirReporter.getTableFromSubject(config.getKylinMetricsSubjectQuery()); + return generateHiveTableSQL(tableName, getHiveColumnsForMetricsQuery(), getPartitionKVsForHiveTable()); + } + + public static String generateHiveTableSQLForMetricsQueryCUBE(KylinConfig config) { + String tableName = HiveReservoirReporter.getTableFromSubject(config.getKylinMetricsSubjectQueryCube()); + return generateHiveTableSQL(tableName, getHiveColumnsForMetricsQueryCube(), getPartitionKVsForHiveTable()); + } + + public static String generateHiveTableSQLForMetricsQueryRPC(KylinConfig config) { + String tableName = HiveReservoirReporter.getTableFromSubject(config.getKylinMetricsSubjectQueryRpcCall()); + return generateHiveTableSQL(tableName, getHiveColumnsForMetricsQueryRPC(), getPartitionKVsForHiveTable()); + } + + public static String generateHiveTableSQLForMetricsJob(KylinConfig config) { + String tableName = HiveReservoirReporter.getTableFromSubject(config.getKylinMetricsSubjectJob()); + return generateHiveTableSQL(tableName, getHiveColumnsForMetricsJob(), getPartitionKVsForHiveTable()); + } + + public static String generateHiveTableSQLForMetricsJobException(KylinConfig config) { + String tableName = HiveReservoirReporter.getTableFromSubject(config.getKylinMetricsSubjectJobException()); + return generateHiveTableSQL(tableName, getHiveColumnsForMetricsJobException(), getPartitionKVsForHiveTable()); + } + + public static List<Pair<String, String>> getHiveColumnsForMetricsQuery() { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.add(new Pair<>(QueryPropertyEnum.ID_CODE.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(RecordEvent.RecordReserveKeyEnum.HOST.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryPropertyEnum.USER.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryPropertyEnum.PROJECT.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryPropertyEnum.REALIZATION.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryPropertyEnum.REALIZATION_TYPE.toString(), HiveTypeEnum.HINT.toString())); + columns.add(new Pair<>(QueryPropertyEnum.TYPE.toString(), HiveTypeEnum.HSTRING.toString())); + + columns.add(new Pair<>(QueryPropertyEnum.EXCEPTION.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryPropertyEnum.TIME_COST.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryPropertyEnum.CALCITE_RETURN_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryPropertyEnum.STORAGE_RETURN_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryPropertyEnum.AGGR_FILTER_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + + columns.addAll(getTimeColumnsForMetrics()); + return columns; + } + + public static List<Pair<String, String>> getHiveColumnsForMetricsQueryCube() { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.add(new Pair<>(RecordEvent.RecordReserveKeyEnum.HOST.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.PROJECT.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.CUBE.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.SEGMENT.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.CUBOID_SOURCE.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.CUBOID_TARGET.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.IF_MATCH.toString(), HiveTypeEnum.HBOOLEAN.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.FILTER_MASK.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.IF_SUCCESS.toString(), HiveTypeEnum.HBOOLEAN.toString())); + + columns.add(new Pair<>(QueryCubePropertyEnum.WEIGHT_PER_HIT.toString(), HiveTypeEnum.HDOUBLE.toString())); + + columns.add(new Pair<>(QueryCubePropertyEnum.CALL_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.TIME_SUM.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.TIME_MAX.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.SKIP_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.SCAN_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.RETURN_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.AGGR_FILTER_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryCubePropertyEnum.AGGR_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + + columns.addAll(getTimeColumnsForMetrics()); + return columns; + } + + public static List<Pair<String, String>> getHiveColumnsForMetricsQueryRPC() { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.add(new Pair<>(RecordEvent.RecordReserveKeyEnum.HOST.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryRPCPropertyEnum.PROJECT.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryRPCPropertyEnum.REALIZATION.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryRPCPropertyEnum.RPC_SERVER.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(QueryRPCPropertyEnum.EXCEPTION.toString(), HiveTypeEnum.HSTRING.toString())); + + columns.add(new Pair<>(QueryRPCPropertyEnum.CALL_TIME.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryRPCPropertyEnum.RETURN_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryRPCPropertyEnum.SCAN_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryRPCPropertyEnum.SKIP_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryRPCPropertyEnum.AGGR_FILTER_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(QueryRPCPropertyEnum.AGGR_COUNT.toString(), HiveTypeEnum.HBIGINT.toString())); + + columns.addAll(getTimeColumnsForMetrics()); + return columns; + } + + public static List<Pair<String, String>> getHiveColumnsForMetricsJob() { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.add(new Pair<>(JobPropertyEnum.ID_CODE.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(RecordEvent.RecordReserveKeyEnum.HOST.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(JobPropertyEnum.USER.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(JobPropertyEnum.PROJECT.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(JobPropertyEnum.CUBE.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(JobPropertyEnum.TYPE.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(JobPropertyEnum.ALGORITHM.toString(), HiveTypeEnum.HSTRING.toString())); + + columns.add(new Pair<>(JobPropertyEnum.BUILD_DURATION.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(JobPropertyEnum.SOURCE_SIZE.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(JobPropertyEnum.CUBE_SIZE.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(JobPropertyEnum.PER_BYTES_TIME_COST.toString(), HiveTypeEnum.HDOUBLE.toString())); + columns.add(new Pair<>(JobPropertyEnum.WAIT_RESOURCE_TIME.toString(), HiveTypeEnum.HBIGINT.toString())); + + columns.add( + new Pair<>(JobPropertyEnum.STEP_DURATION_DISTINCT_COLUMNS.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(JobPropertyEnum.STEP_DURATION_DICTIONARY.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(JobPropertyEnum.STEP_DURATION_INMEM_CUBING.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add( + new Pair<>(JobPropertyEnum.STEP_DURATION_HFILE_CONVERT.toString(), HiveTypeEnum.HBIGINT.toString())); + + columns.addAll(getTimeColumnsForMetrics()); + return columns; + } + + public static List<Pair<String, String>> getHiveColumnsForMetricsJobException() { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.add(new Pair<>(JobPropertyEnum.ID_CODE.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(RecordEvent.RecordReserveKeyEnum.HOST.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(JobPropertyEnum.USER.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(JobPropertyEnum.PROJECT.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(JobPropertyEnum.CUBE.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(JobPropertyEnum.TYPE.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(JobPropertyEnum.ALGORITHM.toString(), HiveTypeEnum.HSTRING.toString())); + + columns.add(new Pair<>(JobPropertyEnum.EXCEPTION.toString(), HiveTypeEnum.HSTRING.toString())); + + columns.addAll(getTimeColumnsForMetrics()); + return columns; + } + + public static List<Pair<String, String>> getPartitionKVsForHiveTable() { + List<Pair<String, String>> partitionKVs = Lists.newLinkedList(); + partitionKVs.add(new Pair<>(TimePropertyEnum.DAY_DATE.toString(), HiveTypeEnum.HSTRING.toString())); + return partitionKVs; + } + + public static List<Pair<String, String>> getTimeColumnsForMetrics() { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.add(new Pair<>(RecordEvent.RecordReserveKeyEnum.TIME.toString(), HiveTypeEnum.HBIGINT.toString())); + columns.add(new Pair<>(TimePropertyEnum.YEAR.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(TimePropertyEnum.MONTH.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(TimePropertyEnum.WEEK_BEGIN_DATE.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(TimePropertyEnum.DAY_TIME.toString(), HiveTypeEnum.HSTRING.toString())); + columns.add(new Pair<>(TimePropertyEnum.TIME_HOUR.toString(), HiveTypeEnum.HINT.toString())); + columns.add(new Pair<>(TimePropertyEnum.TIME_MINUTE.toString(), HiveTypeEnum.HINT.toString())); + columns.add(new Pair<>(TimePropertyEnum.TIME_SECOND.toString(), HiveTypeEnum.HINT.toString())); + + return columns; + } + + enum HiveTypeEnum { + HBOOLEAN("boolean"), HINT("int"), HBIGINT("bigint"), HDOUBLE("double"), HSTRING("string"), HDECIMAL("decimal"); + + private final String typeName; + + HiveTypeEnum(String typeName) { + this.typeName = typeName; + } + + public static HiveTypeEnum getByTypeName(String typeName) { + if (Strings.isNullOrEmpty(typeName)) { + return null; + } + for (HiveTypeEnum hiveType : HiveTypeEnum.values()) { + if (hiveType.typeName.equals(typeName.toLowerCase())) { + return hiveType; + } + } + return null; + } + + public String toString() { + return typeName; + } + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/a0c9795f/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/KylinTableCreator.java ---------------------------------------------------------------------- diff --git a/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/KylinTableCreator.java b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/KylinTableCreator.java new file mode 100644 index 0000000..8aac466 --- /dev/null +++ b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/KylinTableCreator.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.tool.metrics.systemcube; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.util.List; +import java.util.UUID; + +import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.common.util.Pair; +import org.apache.kylin.metadata.TableMetadataManager; +import org.apache.kylin.metadata.model.ColumnDesc; +import org.apache.kylin.metadata.model.TableDesc; +import org.apache.kylin.metrics.MetricsManager; +import org.apache.kylin.metrics.lib.ActiveReservoirReporter; +import org.apache.kylin.metrics.lib.SinkTool; +import org.apache.kylin.tool.metrics.systemcube.util.HiveSinkTool; + +import com.google.common.collect.Lists; + +public class KylinTableCreator { + + public static void main(String[] args) throws Exception { + // KylinConfig.setSandboxEnvIfPossible(); + KylinConfig config = KylinConfig.getInstanceFromEnv(); + + TableDesc kylinTable = generateKylinTableForMetricsQuery(config, new HiveSinkTool()); + ByteArrayOutputStream buf = new ByteArrayOutputStream(); + DataOutputStream dout = new DataOutputStream(buf); + TableMetadataManager.TABLE_SERIALIZER.serialize(kylinTable, dout); + dout.close(); + buf.close(); + System.out.println(buf.toString()); + } + + public static TableDesc generateKylinTableForMetricsQuery(KylinConfig kylinConfig, SinkTool sinkTool) { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.addAll(HiveTableCreator.getHiveColumnsForMetricsQuery()); + columns.addAll(HiveTableCreator.getPartitionKVsForHiveTable()); + return generateKylinTable(sinkTool, kylinConfig.getKylinMetricsSubjectQuery(), columns); + } + + public static TableDesc generateKylinTableForMetricsQueryCube(KylinConfig kylinConfig, SinkTool sinkTool) { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.addAll(HiveTableCreator.getHiveColumnsForMetricsQueryCube()); + columns.addAll(HiveTableCreator.getPartitionKVsForHiveTable()); + return generateKylinTable(sinkTool, kylinConfig.getKylinMetricsSubjectQueryCube(), columns); + } + + public static TableDesc generateKylinTableForMetricsQueryRPC(KylinConfig kylinConfig, SinkTool sinkTool) { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.addAll(HiveTableCreator.getHiveColumnsForMetricsQueryRPC()); + columns.addAll(HiveTableCreator.getPartitionKVsForHiveTable()); + return generateKylinTable(sinkTool, kylinConfig.getKylinMetricsSubjectQueryRpcCall(), columns); + } + + public static TableDesc generateKylinTableForMetricsJob(KylinConfig kylinConfig, SinkTool sinkTool) { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.addAll(HiveTableCreator.getHiveColumnsForMetricsJob()); + columns.addAll(HiveTableCreator.getPartitionKVsForHiveTable()); + return generateKylinTable(sinkTool, kylinConfig.getKylinMetricsSubjectJob(), columns); + } + + public static TableDesc generateKylinTableForMetricsJobException(KylinConfig kylinConfig, SinkTool sinkTool) { + List<Pair<String, String>> columns = Lists.newLinkedList(); + columns.addAll(HiveTableCreator.getHiveColumnsForMetricsJobException()); + columns.addAll(HiveTableCreator.getPartitionKVsForHiveTable()); + return generateKylinTable(sinkTool, kylinConfig.getKylinMetricsSubjectJobException(), columns); + } + + public static TableDesc generateKylinTable(SinkTool sinkTool, String subject, List<Pair<String, String>> columns) { + TableDesc kylinTable = new TableDesc(); + + Pair<String, String> tableNameSplits = ActiveReservoirReporter + .getTableNameSplits(sinkTool.getTableNameForMetrics(subject)); + kylinTable.setUuid(UUID.randomUUID().toString()); + kylinTable.setDatabase(tableNameSplits.getFirst()); + kylinTable.setName(tableNameSplits.getSecond()); + kylinTable.setTableType(null); + kylinTable.setLastModified(0L); + kylinTable.setSourceType(sinkTool.getSourceType()); + + ColumnDesc[] columnDescs = new ColumnDesc[columns.size()]; + for (int i = 0; i < columns.size(); i++) { + columnDescs[i] = new ColumnDesc(); + Pair<String, String> entry = columns.get(i); + columnDescs[i].setId(Integer.toString(i + 1)); + columnDescs[i].setName(entry.getFirst()); + columnDescs[i].setDatatype(entry.getSecond()); + } + kylinTable.setColumns(columnDescs); + + kylinTable.init(MetricsManager.SYSTEM_PROJECT); + + return kylinTable; + } +} http://git-wip-us.apache.org/repos/asf/kylin/blob/a0c9795f/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/ModelCreator.java ---------------------------------------------------------------------- diff --git a/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/ModelCreator.java b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/ModelCreator.java new file mode 100644 index 0000000..0679f0a --- /dev/null +++ b/tool/src/main/java/org/apache/kylin/tool/metrics/systemcube/ModelCreator.java @@ -0,0 +1,267 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.tool.metrics.systemcube; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.util.List; + +import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.common.persistence.JsonSerializer; +import org.apache.kylin.common.persistence.Serializer; +import org.apache.kylin.metadata.model.DataModelDesc; +import org.apache.kylin.metadata.model.JoinTableDesc; +import org.apache.kylin.metadata.model.ModelDimensionDesc; +import org.apache.kylin.metadata.model.PartitionDesc; +import org.apache.kylin.metrics.lib.SinkTool; +import org.apache.kylin.metrics.lib.impl.RecordEvent; +import org.apache.kylin.metrics.lib.impl.TimePropertyEnum; +import org.apache.kylin.metrics.property.JobPropertyEnum; +import org.apache.kylin.metrics.property.QueryCubePropertyEnum; +import org.apache.kylin.metrics.property.QueryPropertyEnum; +import org.apache.kylin.metrics.property.QueryRPCPropertyEnum; +import org.apache.kylin.tool.metrics.systemcube.util.HiveSinkTool; + +import com.google.common.collect.Lists; + +public class ModelCreator { + + public static final Serializer<DataModelDesc> MODELDESC_SERIALIZER = new JsonSerializer<>(DataModelDesc.class); + + public static void main(String[] args) throws Exception { + // KylinConfig.setSandboxEnvIfPossible(); + KylinConfig config = KylinConfig.getInstanceFromEnv(); + + DataModelDesc kylinModel = generateKylinModelForMetricsQuery("ADMIN", config, new HiveSinkTool()); + ByteArrayOutputStream buf = new ByteArrayOutputStream(); + DataOutputStream dout = new DataOutputStream(buf); + MODELDESC_SERIALIZER.serialize(kylinModel, dout); + dout.close(); + buf.close(); + System.out.println(buf.toString()); + } + + public static PartitionDesc getPartitionDesc(String tableName) { + PartitionDesc partitionDesc = new PartitionDesc(); + + partitionDesc.setPartitionDateColumn(tableName + "." + TimePropertyEnum.DAY_DATE.toString()); + partitionDesc.setPartitionTimeColumn(tableName + "." + TimePropertyEnum.DAY_TIME.toString()); + return partitionDesc; + } + + public static DataModelDesc generateKylinModelForMetricsQuery(String owner, KylinConfig kylinConfig, + SinkTool sinkTool) { + String tableName = sinkTool.getTableNameForMetrics(kylinConfig.getKylinMetricsSubjectQuery()); + return generateKylinModel(owner, tableName, getDimensionsForMetricsQuery(), getMeasuresForMetricsQuery(), + getPartitionDesc(tableName)); + } + + public static DataModelDesc generateKylinModelForMetricsQueryCube(String owner, KylinConfig kylinConfig, + SinkTool sinkTool) { + String tableName = sinkTool.getTableNameForMetrics(kylinConfig.getKylinMetricsSubjectQueryCube()); + return generateKylinModel(owner, tableName, getDimensionsForMetricsQueryCube(), + getMeasuresForMetricsQueryCube(), getPartitionDesc(tableName)); + } + + public static DataModelDesc generateKylinModelForMetricsQueryRPC(String owner, KylinConfig kylinConfig, + SinkTool sinkTool) { + String tableName = sinkTool.getTableNameForMetrics(kylinConfig.getKylinMetricsSubjectQueryRpcCall()); + return generateKylinModel(owner, tableName, getDimensionsForMetricsQueryRPC(), getMeasuresForMetricsQueryRPC(), + getPartitionDesc(tableName)); + } + + public static DataModelDesc generateKylinModelForMetricsJob(String owner, KylinConfig kylinConfig, + SinkTool sinkTool) { + String tableName = sinkTool.getTableNameForMetrics(kylinConfig.getKylinMetricsSubjectJob()); + return generateKylinModel(owner, tableName, getDimensionsForMetricsJob(), getMeasuresForMetricsJob(), + getPartitionDesc(tableName)); + } + + public static DataModelDesc generateKylinModelForMetricsJobException(String owner, KylinConfig kylinConfig, + SinkTool sinkTool) { + String tableName = sinkTool.getTableNameForMetrics(kylinConfig.getKylinMetricsSubjectJobException()); + return generateKylinModel(owner, tableName, getDimensionsForMetricsJobException(), + getMeasuresForMetricsJobException(), getPartitionDesc(tableName)); + } + + public static List<String> getDimensionsForMetricsQuery() { + List<String> result = Lists.newLinkedList(); + result.add(RecordEvent.RecordReserveKeyEnum.HOST.toString()); + result.add(QueryPropertyEnum.USER.toString()); + result.add(QueryPropertyEnum.PROJECT.toString()); + result.add(QueryPropertyEnum.REALIZATION.toString()); + result.add(QueryPropertyEnum.REALIZATION_TYPE.toString()); + result.add(QueryPropertyEnum.TYPE.toString()); + result.add(QueryPropertyEnum.EXCEPTION.toString()); + + result.addAll(getTimeDimensionsForMetrics()); + return result; + } + + public static List<String> getMeasuresForMetricsQuery() { + List<String> result = Lists.newLinkedList(); + result.add(QueryPropertyEnum.ID_CODE.toString()); + result.add(QueryPropertyEnum.TIME_COST.toString()); + result.add(QueryPropertyEnum.CALCITE_RETURN_COUNT.toString()); + result.add(QueryPropertyEnum.STORAGE_RETURN_COUNT.toString()); + result.add(QueryPropertyEnum.AGGR_FILTER_COUNT.toString()); + + return result; + } + + public static List<String> getDimensionsForMetricsQueryCube() { + List<String> result = Lists.newLinkedList(); + result.add(RecordEvent.RecordReserveKeyEnum.HOST.toString()); + result.add(QueryCubePropertyEnum.PROJECT.toString()); + result.add(QueryCubePropertyEnum.CUBE.toString()); + result.add(QueryCubePropertyEnum.SEGMENT.toString()); + result.add(QueryCubePropertyEnum.CUBOID_SOURCE.toString()); + result.add(QueryCubePropertyEnum.CUBOID_TARGET.toString()); + result.add(QueryCubePropertyEnum.FILTER_MASK.toString()); + result.add(QueryCubePropertyEnum.IF_MATCH.toString()); + result.add(QueryCubePropertyEnum.IF_SUCCESS.toString()); + + result.addAll(getTimeDimensionsForMetrics()); + return result; + } + + public static List<String> getMeasuresForMetricsQueryCube() { + List<String> result = Lists.newLinkedList(); + result.add(QueryCubePropertyEnum.WEIGHT_PER_HIT.toString()); + result.add(QueryCubePropertyEnum.CALL_COUNT.toString()); + result.add(QueryCubePropertyEnum.TIME_SUM.toString()); + result.add(QueryCubePropertyEnum.TIME_MAX.toString()); + result.add(QueryCubePropertyEnum.SKIP_COUNT.toString()); + result.add(QueryCubePropertyEnum.SCAN_COUNT.toString()); + result.add(QueryCubePropertyEnum.RETURN_COUNT.toString()); + result.add(QueryCubePropertyEnum.AGGR_FILTER_COUNT.toString()); + result.add(QueryCubePropertyEnum.AGGR_COUNT.toString()); + + return result; + } + + public static List<String> getDimensionsForMetricsQueryRPC() { + List<String> result = Lists.newLinkedList(); + result.add(RecordEvent.RecordReserveKeyEnum.HOST.toString()); + result.add(QueryRPCPropertyEnum.PROJECT.toString()); + result.add(QueryRPCPropertyEnum.REALIZATION.toString()); + result.add(QueryRPCPropertyEnum.RPC_SERVER.toString()); + result.add(QueryRPCPropertyEnum.EXCEPTION.toString()); + + result.addAll(getTimeDimensionsForMetrics()); + return result; + } + + public static List<String> getMeasuresForMetricsQueryRPC() { + List<String> result = Lists.newLinkedList(); + result.add(QueryRPCPropertyEnum.CALL_TIME.toString()); + result.add(QueryRPCPropertyEnum.RETURN_COUNT.toString()); + result.add(QueryRPCPropertyEnum.SCAN_COUNT.toString()); + result.add(QueryRPCPropertyEnum.SKIP_COUNT.toString()); + result.add(QueryRPCPropertyEnum.AGGR_FILTER_COUNT.toString()); + result.add(QueryRPCPropertyEnum.AGGR_COUNT.toString()); + + return result; + } + + public static List<String> getDimensionsForMetricsJob() { + List<String> result = Lists.newLinkedList(); + result.add(JobPropertyEnum.USER.toString()); + result.add(JobPropertyEnum.PROJECT.toString()); + result.add(JobPropertyEnum.CUBE.toString()); + result.add(JobPropertyEnum.TYPE.toString()); + result.add(JobPropertyEnum.ALGORITHM.toString()); + + result.addAll(getTimeDimensionsForMetrics()); + return result; + } + + public static List<String> getMeasuresForMetricsJob() { + List<String> result = Lists.newLinkedList(); + result.add(JobPropertyEnum.BUILD_DURATION.toString()); + result.add(JobPropertyEnum.SOURCE_SIZE.toString()); + result.add(JobPropertyEnum.CUBE_SIZE.toString()); + result.add(JobPropertyEnum.PER_BYTES_TIME_COST.toString()); + result.add(JobPropertyEnum.WAIT_RESOURCE_TIME.toString()); + + result.add(JobPropertyEnum.STEP_DURATION_DISTINCT_COLUMNS.toString()); + result.add(JobPropertyEnum.STEP_DURATION_DICTIONARY.toString()); + result.add(JobPropertyEnum.STEP_DURATION_INMEM_CUBING.toString()); + result.add(JobPropertyEnum.STEP_DURATION_HFILE_CONVERT.toString()); + + return result; + } + + public static List<String> getDimensionsForMetricsJobException() { + List<String> result = Lists.newLinkedList(); + result.add(JobPropertyEnum.USER.toString()); + result.add(JobPropertyEnum.PROJECT.toString()); + result.add(JobPropertyEnum.CUBE.toString()); + result.add(JobPropertyEnum.TYPE.toString()); + result.add(JobPropertyEnum.ALGORITHM.toString()); + result.add(JobPropertyEnum.EXCEPTION.toString()); + + result.addAll(getTimeDimensionsForMetrics()); + return result; + } + + public static List<String> getMeasuresForMetricsJobException() { + List<String> result = Lists.newLinkedList(); + result.add(JobPropertyEnum.ID_CODE.toString()); + + return result; + } + + public static List<String> getTimeDimensionsForMetrics() { + List<String> result = Lists.newLinkedList(); + result.add(RecordEvent.RecordReserveKeyEnum.TIME.toString()); + result.add(TimePropertyEnum.YEAR.toString()); + result.add(TimePropertyEnum.MONTH.toString()); + result.add(TimePropertyEnum.WEEK_BEGIN_DATE.toString()); + result.add(TimePropertyEnum.DAY_TIME.toString()); + result.add(TimePropertyEnum.DAY_DATE.toString()); + result.add(TimePropertyEnum.TIME_HOUR.toString()); + result.add(TimePropertyEnum.TIME_MINUTE.toString()); + + return result; + } + + public static DataModelDesc generateKylinModel(String owner, String tableName, List<String> dimensions, + List<String> measures, PartitionDesc partitionDesc) { + ModelDimensionDesc modelDimensionDesc = new ModelDimensionDesc(); + modelDimensionDesc.setTable(tableName); + modelDimensionDesc.setColumns(dimensions.toArray(new String[dimensions.size()])); + + DataModelDesc kylinModel = new DataModelDesc(); + kylinModel.setName(tableName.replace('.', '_')); + kylinModel.setOwner(owner); + kylinModel.setDescription(""); + kylinModel.setLastModified(0L); + kylinModel.setRootFactTableName(tableName); + kylinModel.setJoinTables(new JoinTableDesc[0]); + kylinModel.setDimensions(Lists.newArrayList(modelDimensionDesc)); + kylinModel.setMetrics(measures.toArray(new String[measures.size()])); + kylinModel.setFilterCondition(""); + kylinModel.setPartitionDesc(partitionDesc); + kylinModel.setCapacity(DataModelDesc.RealizationCapacity.SMALL); + kylinModel.updateRandomUuid(); + + return kylinModel; + } +}