shaofengshi closed pull request #147: KYLIN-3161 Enforce global dictionary for 
bitmap count distinct column…
URL: https://github.com/apache/kylin/pull/147
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/core-cube/src/main/java/org/apache/kylin/cube/model/validation/rule/DictionaryRule.java
 
b/core-cube/src/main/java/org/apache/kylin/cube/model/validation/rule/DictionaryRule.java
index df1316d3fe..8f73ffb31d 100644
--- 
a/core-cube/src/main/java/org/apache/kylin/cube/model/validation/rule/DictionaryRule.java
+++ 
b/core-cube/src/main/java/org/apache/kylin/cube/model/validation/rule/DictionaryRule.java
@@ -19,8 +19,10 @@
 package org.apache.kylin.cube.model.validation.rule;
 
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 
 import org.apache.commons.lang.StringUtils;
@@ -31,6 +33,8 @@
 import org.apache.kylin.cube.model.validation.ResultLevel;
 import org.apache.kylin.cube.model.validation.ValidateContext;
 import org.apache.kylin.dict.GlobalDictionaryBuilder;
+import org.apache.kylin.measure.bitmap.BitmapMeasureType;
+import org.apache.kylin.metadata.model.MeasureDesc;
 import org.apache.kylin.metadata.model.TblColRef;
 
 /**
@@ -48,6 +52,8 @@
     static final String ERROR_REUSE_BUILDER_BOTH_EMPTY = "REUSE and BUILDER 
both empty on dictionary for column: ";
     static final String ERROR_TRANSITIVE_REUSE = "Transitive REUSE is not 
allowed for dictionary: ";
     static final String ERROR_GLOBAL_DICTIONNARY_ONLY_MEASURE = "If one column 
is used for both dimension and precisely count distinct measure, its dimension 
encoding should not be dict: ";
+    static final String ERROR_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE = "For 
bitmap based count distinct column (as the data type is not int), a Global 
dictionary is required: ";
+    static final String ERROR_REUSE_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE = 
"If one bitmap based count distinct column (as the data type is not int) REUSE 
another column, a Global dictionary is required: ";
 
     @Override
     public void validate(CubeDesc cubeDesc, ValidateContext context) {
@@ -60,8 +66,13 @@ public void validate(CubeDesc cubeDesc, ValidateContext 
context) {
         }
 
         Set<TblColRef> allDictCols = new HashSet<>();
-        Set<TblColRef> baseCols = new HashSet<>(); // col with builder
+        Map<TblColRef, DictionaryDesc> baseCols = new HashMap<>(); // col with 
builder
         List<DictionaryDesc> reuseDictionaries = new ArrayList<>();
+        Map<TblColRef, MeasureDesc> bitmapMeasures = new HashMap<>();
+        for (MeasureDesc measureDesc : cubeDesc.getMeasures()){
+            if (measureDesc.getFunction().getMeasureType() instanceof 
BitmapMeasureType)
+                
bitmapMeasures.put(measureDesc.getFunction().getParameter().getColRef(), 
measureDesc);
+        }
 
         // first pass
         for (DictionaryDesc dictDesc : dictDescs) {
@@ -89,17 +100,32 @@ public void validate(CubeDesc cubeDesc, ValidateContext 
context) {
                 return;
             }
 
+            if (StringUtils.isNotEmpty(builderClass) && 
!builderClass.equalsIgnoreCase(GlobalDictionaryBuilder.class.getName()) && 
bitmapMeasures.containsKey(dictCol) && 
!dictCol.getColumnDesc().getType().isIntegerFamily()){
+                context.addResult(ResultLevel.ERROR, 
ERROR_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE + dictCol);
+                return;
+            }
+
             if (reuseCol != null) {
                 reuseDictionaries.add(dictDesc);
             } else {
-                baseCols.add(dictCol);
+                baseCols.put(dictCol, dictDesc);
             }
         }
 
         // second pass: check no transitive reuse
         for (DictionaryDesc dictDesc : reuseDictionaries) {
-            if (!baseCols.contains(dictDesc.getResuseColumnRef())) {
-                context.addResult(ResultLevel.ERROR, ERROR_TRANSITIVE_REUSE + 
dictDesc.getColumnRef());
+            TblColRef dictCol = dictDesc.getColumnRef();
+
+            if (!baseCols.containsKey(dictDesc.getResuseColumnRef())) {
+                context.addResult(ResultLevel.ERROR, ERROR_TRANSITIVE_REUSE + 
dictCol);
+                return;
+            }
+
+            TblColRef reuseCol = dictDesc.getResuseColumnRef();
+            String reuseBuilderClass = 
baseCols.get(reuseCol).getBuilderClass();
+
+            if (bitmapMeasures.containsKey(dictCol) && 
!dictCol.getColumnDesc().getType().isIntegerFamily() && 
!reuseBuilderClass.equalsIgnoreCase(GlobalDictionaryBuilder.class.getName())){
+                context.addResult(ResultLevel.ERROR, 
ERROR_REUSE_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE + dictCol);
                 return;
             }
         }
diff --git 
a/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java
 
b/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java
index 0dd9b76969..b4ecb013c3 100644
--- 
a/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java
+++ 
b/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java
@@ -19,9 +19,11 @@
 package org.apache.kylin.cube.model.validation.rule;
 
 import static 
org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_DUPLICATE_DICTIONARY_COLUMN;
+import static 
org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE;
 import static 
org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_GLOBAL_DICTIONNARY_ONLY_MEASURE;
 import static 
org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_REUSE_BUILDER_BOTH_EMPTY;
 import static 
org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_REUSE_BUILDER_BOTH_SET;
+import static 
org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_REUSE_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE;
 import static 
org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_TRANSITIVE_REUSE;
 import static org.junit.Assert.assertTrue;
 
@@ -38,6 +40,7 @@
 import org.apache.kylin.cube.model.DictionaryDesc;
 import org.apache.kylin.cube.model.validation.ValidateContext;
 import org.apache.kylin.dict.GlobalDictionaryBuilder;
+import org.apache.kylin.dict.global.SegmentAppendTrieDictBuilder;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -101,6 +104,19 @@ public void testBadDesc5() throws IOException {
                 DictionaryDesc.create("CATEG_LVL2_NAME", null, 
GlobalDictionaryBuilder.class.getName()));
     }
 
+    @Test
+    public void testBadDesc6() throws IOException {
+        testDictionaryDesc(ERROR_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE,
+                DictionaryDesc.create("TEST_COUNT_DISTINCT_BITMAP", null, 
SegmentAppendTrieDictBuilder.class.getName()));
+    }
+
+    @Test
+    public void testBadDesc7() throws IOException {
+        testDictionaryDesc(ERROR_REUSE_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE,
+                DictionaryDesc.create("SELLER_ID", null, 
SegmentAppendTrieDictBuilder.class.getName()),
+                DictionaryDesc.create("TEST_COUNT_DISTINCT_BITMAP", 
"SELLER_ID", null));
+    }
+
     @Test
     public void testGoodDesc2() throws IOException {
         testDictionaryDesc(null, DictionaryDesc.create("SELLER_ID", null, 
GlobalDictionaryBuilder.class.getName()));
diff --git 
a/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json 
b/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json
index ec1100a1da..a1e3202690 100644
--- a/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json
+++ b/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json
@@ -323,7 +323,7 @@
   "dictionaries": [
     {
       "column": "TEST_KYLIN_FACT.TEST_COUNT_DISTINCT_BITMAP",
-      "builder": "org.apache.kylin.dict.global.SegmentAppendTrieDictBuilder"
+      "builder": "org.apache.kylin.dict.GlobalDictionaryBuilder"
     }
   ],
   "rowkey": {
diff --git 
a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
 
b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
index 25b66f2674..b035caee69 100644
--- 
a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
+++ 
b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
@@ -127,6 +127,17 @@
       "returntype" : "bitmap"
     },
     "dependent_measure_ref" : null
+  },{
+    "name": "TEST_COUNT_DISTINCT_BITMAP",
+    "function": {
+      "expression": "COUNT_DISTINCT",
+      "parameter": {
+        "type": "column",
+        "value": "TEST_COUNT_DISTINCT_BITMAP"
+      },
+      "returntype": "bitmap"
+    },
+    "dependent_measure_ref" : null
   }, {
     "name" : "SELLER_FORMAT_CNT",
     "function" : {
@@ -255,7 +266,7 @@
       "name" : "f2",
       "columns" : [ {
         "qualifier" : "m",
-        "measure_refs" : [ "seller_cnt_bitmap", "user_count_bitmap", 
"seller_format_cnt"]
+        "measure_refs" : [ "seller_cnt_bitmap", "user_count_bitmap", 
"TEST_COUNT_DISTINCT_BITMAP", "seller_format_cnt"]
       } ]
     }, {
       "name" : "f3",


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to