This is an automated email from the ASF dual-hosted git repository. shaofengshi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/master by this push: new 0d9fbbe KYLIN-3416 Group by with expression can not aggregate exactly 0d9fbbe is described below commit 0d9fbbee85c6a5415639cd59ba5ddacbd5056537 Author: chao long <wayn...@qq.com> AuthorDate: Tue Dec 11 11:12:01 2018 +0800 KYLIN-3416 Group by with expression can not aggregate exactly --- .../java/org/apache/kylin/metadata/realization/SQLDigest.java | 4 +++- .../apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java | 10 ++++++++-- .../java/org/apache/kylin/storage/hbase/ITStorageTest.java | 1 + .../java/org/apache/kylin/query/relnode/OLAPAggregateRel.java | 7 +++++++ .../main/java/org/apache/kylin/query/relnode/OLAPContext.java | 3 ++- 5 files changed, 21 insertions(+), 4 deletions(-) diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java b/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java index 0b23e48..fa7d1e5 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java @@ -60,6 +60,7 @@ public class SQLDigest { public Set<TblColRef> subqueryJoinParticipants; public Map<TblColRef, TupleExpression> dynGroupbyColumns; + public boolean groupByExpression; // aggregation public Set<TblColRef> metricColumns; @@ -85,7 +86,7 @@ public class SQLDigest { public SQLDigest(String factTable, Set<TblColRef> allColumns, List<JoinDesc> joinDescs, // model List<TblColRef> groupbyColumns, Set<TblColRef> subqueryJoinParticipants, - Map<TblColRef, TupleExpression> dynGroupByColumns, // group by + Map<TblColRef, TupleExpression> dynGroupByColumns, boolean groupByExpression, // group by Set<TblColRef> metricColumns, List<FunctionDesc> aggregations, List<SQLCall> aggrSqlCalls, // aggregation List<DynamicFunctionDesc> dynAggregations, // Set<TblColRef> rtDimensionColumns, Set<TblColRef> rtMetricColumns, // dynamic col related columns @@ -101,6 +102,7 @@ public class SQLDigest { this.subqueryJoinParticipants = subqueryJoinParticipants; this.dynGroupbyColumns = dynGroupByColumns; + this.groupByExpression = groupByExpression; this.metricColumns = metricColumns; this.aggregations = aggregations; diff --git a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java index 68c4113..8d82873 100644 --- a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java +++ b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java @@ -163,7 +163,7 @@ public abstract class GTCubeStorageQueryBase implements IStorageQuery { // exactAggregation mean: needn't aggregation at storage and query engine both. boolean exactAggregation = isExactAggregation(context, cuboid, groups, otherDimsD, singleValuesD, - derivedPostAggregation, sqlDigest.aggregations, sqlDigest.aggrSqlCalls); + derivedPostAggregation, sqlDigest.aggregations, sqlDigest.aggrSqlCalls, sqlDigest.groupByExpression); context.setExactAggregation(exactAggregation); // replace derived columns in filter with host columns; columns on loosened condition must be added to group by @@ -557,7 +557,7 @@ public abstract class GTCubeStorageQueryBase implements IStorageQuery { private boolean isExactAggregation(StorageContext context, Cuboid cuboid, Collection<TblColRef> groups, Set<TblColRef> othersD, Set<TblColRef> singleValuesD, Set<TblColRef> derivedPostAggregation, - Collection<FunctionDesc> functionDescs, List<SQLDigest.SQLCall> aggrSQLCalls) { + Collection<FunctionDesc> functionDescs, List<SQLDigest.SQLCall> aggrSQLCalls, boolean groupByExpression) { if (context.isNeedStorageAggregation()) { logger.info("exactAggregation is false because need storage aggregation"); return false; @@ -605,6 +605,12 @@ public abstract class GTCubeStorageQueryBase implements IStorageQuery { } } + // for group by expression like: group by seller_id/100. seller_id_1(200) get 2, seller_id_2(201) also get 2, so can't aggregate exactly + if (groupByExpression) { + logger.info("exactAggregation is false because group by expression"); + return false; + } + logger.info("exactAggregation is true, cuboid id is {0}", String.valueOf(cuboid.getId())); return true; } diff --git a/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java b/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java index 61aa560..3f8dccc 100644 --- a/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java +++ b/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java @@ -142,6 +142,7 @@ public class ITStorageTest extends HBaseMetadataTestCase { SQLDigest sqlDigest = new SQLDigest("default.test_kylin_fact", /*allCol*/ Collections.<TblColRef> emptySet(), /*join*/ null, // groups, /*subqueryJoinParticipants*/ Sets.<TblColRef> newHashSet(), // /*dynamicGroupByColumns*/ Collections.<TblColRef, TupleExpression> emptyMap(), // + /*groupByExpression*/ false, // /*metricCol*/ Collections.<TblColRef> emptySet(), aggregations, /*aggrSqlCalls*/ Collections.<SQLCall> emptyList(), // /*dynamicAggregations*/ Collections.<DynamicFunctionDesc> emptyList(), // /*runtimeDimensionColumns*/ Collections.<TblColRef> emptySet(), // diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java index 5ea0437..fabd854 100644 --- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java +++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java @@ -85,6 +85,7 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; +import static org.apache.kylin.metadata.expression.TupleExpression.ExpressionOperatorEnum.COLUMN; /** */ public class OLAPAggregateRel extends Aggregate implements OLAPRel { @@ -267,6 +268,12 @@ public class OLAPAggregateRel extends Aggregate implements OLAPRel { this.groups = Lists.newArrayList(); for (int i = getGroupSet().nextSetBit(0); i >= 0; i = getGroupSet().nextSetBit(i + 1)) { TupleExpression tupleExpression = inputColumnRowType.getSourceColumnsByIndex(i); + + // group by column with operator + if (this.context.groupByExpression == false && !(COLUMN.equals(tupleExpression.getOperator()) && tupleExpression.getChildren().isEmpty())) { + this.context.groupByExpression = true; + } + TblColRef groupOutCol = inputColumnRowType.getColumnByIndex(i); if (tupleExpression instanceof ColumnTupleExpression) { this.groups.add(((ColumnTupleExpression) tupleExpression).getColumn()); diff --git a/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java b/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java index e530b72..c0d010b 100644 --- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java +++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java @@ -133,6 +133,7 @@ public class OLAPContext { public boolean afterJoin = false; public boolean hasJoin = false; public boolean hasWindow = false; + public boolean groupByExpression = false; // checkout if group by column has operator // cube metadata public IRealization realization; @@ -190,7 +191,7 @@ public class OLAPContext { } } sqlDigest = new SQLDigest(firstTableScan.getTableName(), allColumns, joins, // model - groupByColumns, subqueryJoinParticipants, dynGroupBy, // group by + groupByColumns, subqueryJoinParticipants, dynGroupBy, groupByExpression, // group by metricsColumns, aggregations, aggrSqlCalls, dynFuncs, // aggregation rtDimColumns, rtMetricColumns, // runtime related columns filterColumns, filter, havingFilter, // filter