This is an automated email from the ASF dual-hosted git repository.

shaofengshi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kylin.git

commit 7539ac4d5fa613ab705bc3368e78355db0ae5a8e
Author: chao long <wayn...@qq.com>
AuthorDate: Tue Dec 25 19:15:54 2018 +0800

    KYLIN-3731 Segment pruning ignore complex data type
---
 .../src/main/java/org/apache/kylin/cube/common/SegmentPruner.java  | 4 ++++
 .../src/main/java/org/apache/kylin/metadata/datatype/DataType.java | 7 +++++++
 .../apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java   | 2 +-
 .../apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java | 3 +++
 .../main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java | 2 +-
 5 files changed, 16 insertions(+), 2 deletions(-)

diff --git 
a/core-cube/src/main/java/org/apache/kylin/cube/common/SegmentPruner.java 
b/core-cube/src/main/java/org/apache/kylin/cube/common/SegmentPruner.java
index de77511..f3f2052 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/common/SegmentPruner.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/common/SegmentPruner.java
@@ -75,6 +75,10 @@ public class SegmentPruner {
         Map<String, DimensionRangeInfo> segDimRangInfoMap = 
seg.getDimensionRangeInfoMap();
         for (CompareTupleFilter comp : mustTrueCompares) {
             TblColRef col = comp.getColumn();
+
+            if (!col.getType().needCompare()) {
+                continue;
+            }
             
             DimensionRangeInfo dimRangeInfo = 
segDimRangInfoMap.get(col.getIdentity());
             if (dimRangeInfo == null)
diff --git 
a/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java 
b/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java
index e261e77..8aba152 100644
--- 
a/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java
+++ 
b/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java
@@ -248,6 +248,13 @@ public class DataType implements Serializable {
         return getOrder().compare(value1,  value2);
     }
 
+    public boolean needCompare() {
+        if (isComplexType(this) || isBoolean()) {
+            return false;
+        }
+        return true;
+    }
+
     private static String replaceLegacy(String str) {
         String replace = LEGACY_TYPE_MAP.get(str);
         return replace == null ? str : replace;
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
index 27b5208..6848013 100755
--- 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
+++ 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
@@ -160,7 +160,7 @@ public class FactDistinctColumnsReducer extends 
KylinReducer<SelfDefineSortableK
             String value = Bytes.toString(key.getBytes(), 1, key.getLength() - 
1);
             logAFewRows(value);
             // if dimension col, compute max/min value
-            if 
(cubeDesc.listDimensionColumnsExcludingDerived(true).contains(col)) {
+            if 
(cubeDesc.listDimensionColumnsExcludingDerived(true).contains(col) && 
col.getType().needCompare()) {
                 if (minValue == null || col.getType().compare(minValue, value) 
> 0) {
                     minValue = value;
                 }
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java
 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java
index a84a3a1..977e310 100644
--- 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java
+++ 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java
@@ -126,6 +126,9 @@ public class UpdateCubeInfoAfterBuildStep extends 
AbstractExecutable {
         final TblColRef partitionCol = 
segment.getCubeDesc().getModel().getPartitionDesc().getPartitionDateColumnRef();
 
         for (TblColRef dimColRef : 
segment.getCubeDesc().listDimensionColumnsExcludingDerived(true)) {
+            if (!dimColRef.getType().needCompare())
+                continue;
+
             final String factColumnsInputPath = 
this.getParams().get(BatchConstants.CFG_OUTPUT_PATH);
             Path colDir = new Path(factColumnsInputPath, 
dimColRef.getIdentity());
             FileSystem fs = HadoopUtil.getWorkingFileSystem();
diff --git 
a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java
 
b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java
index cdd0ac2..92101b3 100644
--- 
a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java
+++ 
b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java
@@ -758,7 +758,7 @@ public class SparkFactDistinct extends AbstractApplication 
implements Serializab
                     String value = Bytes.toString(key.getBytes(), 1, 
key.getLength() - 1);
                     logAFewRows(value);
                     // if dimension col, compute max/min value
-                    if 
(cubeDesc.listDimensionColumnsExcludingDerived(true).contains(col)) {
+                    if 
(cubeDesc.listDimensionColumnsExcludingDerived(true).contains(col) && 
col.getType().needCompare()) {
                         if (minValue == null || 
col.getType().compare(minValue, value) > 0) {
                             minValue = value;
                         }

Reply via email to