This is an automated email from the ASF dual-hosted git repository. shaofengshi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kylin.git
commit 7539ac4d5fa613ab705bc3368e78355db0ae5a8e Author: chao long <wayn...@qq.com> AuthorDate: Tue Dec 25 19:15:54 2018 +0800 KYLIN-3731 Segment pruning ignore complex data type --- .../src/main/java/org/apache/kylin/cube/common/SegmentPruner.java | 4 ++++ .../src/main/java/org/apache/kylin/metadata/datatype/DataType.java | 7 +++++++ .../apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java | 2 +- .../apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java | 3 +++ .../main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java | 2 +- 5 files changed, 16 insertions(+), 2 deletions(-) diff --git a/core-cube/src/main/java/org/apache/kylin/cube/common/SegmentPruner.java b/core-cube/src/main/java/org/apache/kylin/cube/common/SegmentPruner.java index de77511..f3f2052 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/common/SegmentPruner.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/common/SegmentPruner.java @@ -75,6 +75,10 @@ public class SegmentPruner { Map<String, DimensionRangeInfo> segDimRangInfoMap = seg.getDimensionRangeInfoMap(); for (CompareTupleFilter comp : mustTrueCompares) { TblColRef col = comp.getColumn(); + + if (!col.getType().needCompare()) { + continue; + } DimensionRangeInfo dimRangeInfo = segDimRangInfoMap.get(col.getIdentity()); if (dimRangeInfo == null) diff --git a/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java b/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java index e261e77..8aba152 100644 --- a/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java +++ b/core-metadata/src/main/java/org/apache/kylin/metadata/datatype/DataType.java @@ -248,6 +248,13 @@ public class DataType implements Serializable { return getOrder().compare(value1, value2); } + public boolean needCompare() { + if (isComplexType(this) || isBoolean()) { + return false; + } + return true; + } + private static String replaceLegacy(String str) { String replace = LEGACY_TYPE_MAP.get(str); return replace == null ? str : replace; diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java index 27b5208..6848013 100755 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java @@ -160,7 +160,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<SelfDefineSortableK String value = Bytes.toString(key.getBytes(), 1, key.getLength() - 1); logAFewRows(value); // if dimension col, compute max/min value - if (cubeDesc.listDimensionColumnsExcludingDerived(true).contains(col)) { + if (cubeDesc.listDimensionColumnsExcludingDerived(true).contains(col) && col.getType().needCompare()) { if (minValue == null || col.getType().compare(minValue, value) > 0) { minValue = value; } diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java index a84a3a1..977e310 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/UpdateCubeInfoAfterBuildStep.java @@ -126,6 +126,9 @@ public class UpdateCubeInfoAfterBuildStep extends AbstractExecutable { final TblColRef partitionCol = segment.getCubeDesc().getModel().getPartitionDesc().getPartitionDateColumnRef(); for (TblColRef dimColRef : segment.getCubeDesc().listDimensionColumnsExcludingDerived(true)) { + if (!dimColRef.getType().needCompare()) + continue; + final String factColumnsInputPath = this.getParams().get(BatchConstants.CFG_OUTPUT_PATH); Path colDir = new Path(factColumnsInputPath, dimColRef.getIdentity()); FileSystem fs = HadoopUtil.getWorkingFileSystem(); diff --git a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java index cdd0ac2..92101b3 100644 --- a/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java +++ b/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java @@ -758,7 +758,7 @@ public class SparkFactDistinct extends AbstractApplication implements Serializab String value = Bytes.toString(key.getBytes(), 1, key.getLength() - 1); logAFewRows(value); // if dimension col, compute max/min value - if (cubeDesc.listDimensionColumnsExcludingDerived(true).contains(col)) { + if (cubeDesc.listDimensionColumnsExcludingDerived(true).contains(col) && col.getType().needCompare()) { if (minValue == null || col.getType().compare(minValue, value) > 0) { minValue = value; }