This is an automated email from the ASF dual-hosted git repository.

shaofengshi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kylin.git


The following commit(s) were added to refs/heads/master by this push:
     new 0d9fbbe  KYLIN-3416 Group by with expression can not aggregate exactly
0d9fbbe is described below

commit 0d9fbbee85c6a5415639cd59ba5ddacbd5056537
Author: chao long <wayn...@qq.com>
AuthorDate: Tue Dec 11 11:12:01 2018 +0800

    KYLIN-3416 Group by with expression can not aggregate exactly
---
 .../java/org/apache/kylin/metadata/realization/SQLDigest.java  |  4 +++-
 .../apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java  | 10 ++++++++--
 .../java/org/apache/kylin/storage/hbase/ITStorageTest.java     |  1 +
 .../java/org/apache/kylin/query/relnode/OLAPAggregateRel.java  |  7 +++++++
 .../main/java/org/apache/kylin/query/relnode/OLAPContext.java  |  3 ++-
 5 files changed, 21 insertions(+), 4 deletions(-)

diff --git 
a/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java
 
b/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java
index 0b23e48..fa7d1e5 100644
--- 
a/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java
+++ 
b/core-metadata/src/main/java/org/apache/kylin/metadata/realization/SQLDigest.java
@@ -60,6 +60,7 @@ public class SQLDigest {
     public Set<TblColRef> subqueryJoinParticipants;
 
     public Map<TblColRef, TupleExpression> dynGroupbyColumns;
+    public boolean groupByExpression;
 
     // aggregation
     public Set<TblColRef> metricColumns;
@@ -85,7 +86,7 @@ public class SQLDigest {
 
     public SQLDigest(String factTable, Set<TblColRef> allColumns, 
List<JoinDesc> joinDescs, // model
             List<TblColRef> groupbyColumns, Set<TblColRef> 
subqueryJoinParticipants,
-            Map<TblColRef, TupleExpression> dynGroupByColumns, // group by
+            Map<TblColRef, TupleExpression> dynGroupByColumns, boolean 
groupByExpression, // group by
             Set<TblColRef> metricColumns, List<FunctionDesc> aggregations, 
List<SQLCall> aggrSqlCalls, // aggregation
             List<DynamicFunctionDesc> dynAggregations, //
             Set<TblColRef> rtDimensionColumns, Set<TblColRef> rtMetricColumns, 
// dynamic col related columns
@@ -101,6 +102,7 @@ public class SQLDigest {
         this.subqueryJoinParticipants = subqueryJoinParticipants;
 
         this.dynGroupbyColumns = dynGroupByColumns;
+        this.groupByExpression = groupByExpression;
 
         this.metricColumns = metricColumns;
         this.aggregations = aggregations;
diff --git 
a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java
 
b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java
index 68c4113..8d82873 100644
--- 
a/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java
+++ 
b/core-storage/src/main/java/org/apache/kylin/storage/gtrecord/GTCubeStorageQueryBase.java
@@ -163,7 +163,7 @@ public abstract class GTCubeStorageQueryBase implements 
IStorageQuery {
 
         // exactAggregation mean: needn't aggregation at storage and query 
engine both.
         boolean exactAggregation = isExactAggregation(context, cuboid, groups, 
otherDimsD, singleValuesD,
-                derivedPostAggregation, sqlDigest.aggregations, 
sqlDigest.aggrSqlCalls);
+                derivedPostAggregation, sqlDigest.aggregations, 
sqlDigest.aggrSqlCalls, sqlDigest.groupByExpression);
         context.setExactAggregation(exactAggregation);
 
         // replace derived columns in filter with host columns; columns on 
loosened condition must be added to group by
@@ -557,7 +557,7 @@ public abstract class GTCubeStorageQueryBase implements 
IStorageQuery {
 
     private boolean isExactAggregation(StorageContext context, Cuboid cuboid, 
Collection<TblColRef> groups,
             Set<TblColRef> othersD, Set<TblColRef> singleValuesD, 
Set<TblColRef> derivedPostAggregation,
-            Collection<FunctionDesc> functionDescs, List<SQLDigest.SQLCall> 
aggrSQLCalls) {
+            Collection<FunctionDesc> functionDescs, List<SQLDigest.SQLCall> 
aggrSQLCalls, boolean groupByExpression) {
         if (context.isNeedStorageAggregation()) {
             logger.info("exactAggregation is false because need storage 
aggregation");
             return false;
@@ -605,6 +605,12 @@ public abstract class GTCubeStorageQueryBase implements 
IStorageQuery {
             }
         }
 
+        // for group by expression like: group by seller_id/100. 
seller_id_1(200) get 2, seller_id_2(201) also get 2, so can't aggregate exactly
+        if (groupByExpression) {
+            logger.info("exactAggregation is false because group by 
expression");
+            return false;
+        }
+
         logger.info("exactAggregation is true, cuboid id is {0}", 
String.valueOf(cuboid.getId()));
         return true;
     }
diff --git 
a/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java 
b/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java
index 61aa560..3f8dccc 100644
--- a/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java
+++ b/kylin-it/src/test/java/org/apache/kylin/storage/hbase/ITStorageTest.java
@@ -142,6 +142,7 @@ public class ITStorageTest extends HBaseMetadataTestCase {
             SQLDigest sqlDigest = new SQLDigest("default.test_kylin_fact", 
/*allCol*/ Collections.<TblColRef> emptySet(), /*join*/ null, //
                     groups, /*subqueryJoinParticipants*/ Sets.<TblColRef> 
newHashSet(), //
                     /*dynamicGroupByColumns*/ Collections.<TblColRef, 
TupleExpression> emptyMap(), //
+                    /*groupByExpression*/ false, //
                     /*metricCol*/ Collections.<TblColRef> emptySet(), 
aggregations, /*aggrSqlCalls*/ Collections.<SQLCall> emptyList(), //
                     /*dynamicAggregations*/ Collections.<DynamicFunctionDesc> 
emptyList(), //
                     /*runtimeDimensionColumns*/ Collections.<TblColRef> 
emptySet(), //
diff --git 
a/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java 
b/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java
index 5ea0437..fabd854 100644
--- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java
+++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPAggregateRel.java
@@ -85,6 +85,7 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 
+import static 
org.apache.kylin.metadata.expression.TupleExpression.ExpressionOperatorEnum.COLUMN;
 /**
  */
 public class OLAPAggregateRel extends Aggregate implements OLAPRel {
@@ -267,6 +268,12 @@ public class OLAPAggregateRel extends Aggregate implements 
OLAPRel {
         this.groups = Lists.newArrayList();
         for (int i = getGroupSet().nextSetBit(0); i >= 0; i = 
getGroupSet().nextSetBit(i + 1)) {
             TupleExpression tupleExpression = 
inputColumnRowType.getSourceColumnsByIndex(i);
+
+            // group by column with operator
+            if (this.context.groupByExpression == false && 
!(COLUMN.equals(tupleExpression.getOperator()) && 
tupleExpression.getChildren().isEmpty())) {
+                this.context.groupByExpression = true;
+            }
+
             TblColRef groupOutCol = inputColumnRowType.getColumnByIndex(i);
             if (tupleExpression instanceof ColumnTupleExpression) {
                 this.groups.add(((ColumnTupleExpression) 
tupleExpression).getColumn());
diff --git 
a/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java 
b/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java
index e530b72..c0d010b 100644
--- a/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java
+++ b/query/src/main/java/org/apache/kylin/query/relnode/OLAPContext.java
@@ -133,6 +133,7 @@ public class OLAPContext {
     public boolean afterJoin = false;
     public boolean hasJoin = false;
     public boolean hasWindow = false;
+    public boolean groupByExpression = false; // checkout if group by column 
has operator
 
     // cube metadata
     public IRealization realization;
@@ -190,7 +191,7 @@ public class OLAPContext {
                 }
             }
             sqlDigest = new SQLDigest(firstTableScan.getTableName(), 
allColumns, joins, // model
-                    groupByColumns, subqueryJoinParticipants, dynGroupBy, // 
group by
+                    groupByColumns, subqueryJoinParticipants, dynGroupBy, 
groupByExpression, // group by
                     metricsColumns, aggregations, aggrSqlCalls, dynFuncs, // 
aggregation
                     rtDimColumns, rtMetricColumns, // runtime related columns
                     filterColumns, filter, havingFilter, // filter

Reply via email to