shaofengshi closed pull request #147: KYLIN-3161 Enforce global dictionary for
bitmap count distinct column…
URL: https://github.com/apache/kylin/pull/147
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git
a/core-cube/src/main/java/org/apache/kylin/cube/model/validation/rule/DictionaryRule.java
b/core-cube/src/main/java/org/apache/kylin/cube/model/validation/rule/DictionaryRule.java
index df1316d3fe..8f73ffb31d 100644
---
a/core-cube/src/main/java/org/apache/kylin/cube/model/validation/rule/DictionaryRule.java
+++
b/core-cube/src/main/java/org/apache/kylin/cube/model/validation/rule/DictionaryRule.java
@@ -19,8 +19,10 @@
package org.apache.kylin.cube.model.validation.rule;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
@@ -31,6 +33,8 @@
import org.apache.kylin.cube.model.validation.ResultLevel;
import org.apache.kylin.cube.model.validation.ValidateContext;
import org.apache.kylin.dict.GlobalDictionaryBuilder;
+import org.apache.kylin.measure.bitmap.BitmapMeasureType;
+import org.apache.kylin.metadata.model.MeasureDesc;
import org.apache.kylin.metadata.model.TblColRef;
/**
@@ -48,6 +52,8 @@
static final String ERROR_REUSE_BUILDER_BOTH_EMPTY = "REUSE and BUILDER
both empty on dictionary for column: ";
static final String ERROR_TRANSITIVE_REUSE = "Transitive REUSE is not
allowed for dictionary: ";
static final String ERROR_GLOBAL_DICTIONNARY_ONLY_MEASURE = "If one column
is used for both dimension and precisely count distinct measure, its dimension
encoding should not be dict: ";
+ static final String ERROR_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE = "For
bitmap based count distinct column (as the data type is not int), a Global
dictionary is required: ";
+ static final String ERROR_REUSE_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE =
"If one bitmap based count distinct column (as the data type is not int) REUSE
another column, a Global dictionary is required: ";
@Override
public void validate(CubeDesc cubeDesc, ValidateContext context) {
@@ -60,8 +66,13 @@ public void validate(CubeDesc cubeDesc, ValidateContext
context) {
}
Set<TblColRef> allDictCols = new HashSet<>();
- Set<TblColRef> baseCols = new HashSet<>(); // col with builder
+ Map<TblColRef, DictionaryDesc> baseCols = new HashMap<>(); // col with
builder
List<DictionaryDesc> reuseDictionaries = new ArrayList<>();
+ Map<TblColRef, MeasureDesc> bitmapMeasures = new HashMap<>();
+ for (MeasureDesc measureDesc : cubeDesc.getMeasures()){
+ if (measureDesc.getFunction().getMeasureType() instanceof
BitmapMeasureType)
+
bitmapMeasures.put(measureDesc.getFunction().getParameter().getColRef(),
measureDesc);
+ }
// first pass
for (DictionaryDesc dictDesc : dictDescs) {
@@ -89,17 +100,32 @@ public void validate(CubeDesc cubeDesc, ValidateContext
context) {
return;
}
+ if (StringUtils.isNotEmpty(builderClass) &&
!builderClass.equalsIgnoreCase(GlobalDictionaryBuilder.class.getName()) &&
bitmapMeasures.containsKey(dictCol) &&
!dictCol.getColumnDesc().getType().isIntegerFamily()){
+ context.addResult(ResultLevel.ERROR,
ERROR_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE + dictCol);
+ return;
+ }
+
if (reuseCol != null) {
reuseDictionaries.add(dictDesc);
} else {
- baseCols.add(dictCol);
+ baseCols.put(dictCol, dictDesc);
}
}
// second pass: check no transitive reuse
for (DictionaryDesc dictDesc : reuseDictionaries) {
- if (!baseCols.contains(dictDesc.getResuseColumnRef())) {
- context.addResult(ResultLevel.ERROR, ERROR_TRANSITIVE_REUSE +
dictDesc.getColumnRef());
+ TblColRef dictCol = dictDesc.getColumnRef();
+
+ if (!baseCols.containsKey(dictDesc.getResuseColumnRef())) {
+ context.addResult(ResultLevel.ERROR, ERROR_TRANSITIVE_REUSE +
dictCol);
+ return;
+ }
+
+ TblColRef reuseCol = dictDesc.getResuseColumnRef();
+ String reuseBuilderClass =
baseCols.get(reuseCol).getBuilderClass();
+
+ if (bitmapMeasures.containsKey(dictCol) &&
!dictCol.getColumnDesc().getType().isIntegerFamily() &&
!reuseBuilderClass.equalsIgnoreCase(GlobalDictionaryBuilder.class.getName())){
+ context.addResult(ResultLevel.ERROR,
ERROR_REUSE_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE + dictCol);
return;
}
}
diff --git
a/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java
b/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java
index 0dd9b76969..b4ecb013c3 100644
---
a/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java
+++
b/core-cube/src/test/java/org/apache/kylin/cube/model/validation/rule/DictionaryRuleTest.java
@@ -19,9 +19,11 @@
package org.apache.kylin.cube.model.validation.rule;
import static
org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_DUPLICATE_DICTIONARY_COLUMN;
+import static
org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE;
import static
org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_GLOBAL_DICTIONNARY_ONLY_MEASURE;
import static
org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_REUSE_BUILDER_BOTH_EMPTY;
import static
org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_REUSE_BUILDER_BOTH_SET;
+import static
org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_REUSE_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE;
import static
org.apache.kylin.cube.model.validation.rule.DictionaryRule.ERROR_TRANSITIVE_REUSE;
import static org.junit.Assert.assertTrue;
@@ -38,6 +40,7 @@
import org.apache.kylin.cube.model.DictionaryDesc;
import org.apache.kylin.cube.model.validation.ValidateContext;
import org.apache.kylin.dict.GlobalDictionaryBuilder;
+import org.apache.kylin.dict.global.SegmentAppendTrieDictBuilder;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@@ -101,6 +104,19 @@ public void testBadDesc5() throws IOException {
DictionaryDesc.create("CATEG_LVL2_NAME", null,
GlobalDictionaryBuilder.class.getName()));
}
+ @Test
+ public void testBadDesc6() throws IOException {
+ testDictionaryDesc(ERROR_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE,
+ DictionaryDesc.create("TEST_COUNT_DISTINCT_BITMAP", null,
SegmentAppendTrieDictBuilder.class.getName()));
+ }
+
+ @Test
+ public void testBadDesc7() throws IOException {
+ testDictionaryDesc(ERROR_REUSE_GLOBAL_DICTIONNARY_FOR_BITMAP_MEASURE,
+ DictionaryDesc.create("SELLER_ID", null,
SegmentAppendTrieDictBuilder.class.getName()),
+ DictionaryDesc.create("TEST_COUNT_DISTINCT_BITMAP",
"SELLER_ID", null));
+ }
+
@Test
public void testGoodDesc2() throws IOException {
testDictionaryDesc(null, DictionaryDesc.create("SELLER_ID", null,
GlobalDictionaryBuilder.class.getName()));
diff --git
a/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json
b/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json
index ec1100a1da..a1e3202690 100644
--- a/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json
+++ b/examples/test_case_data/localmeta/cube_desc/ci_inner_join_cube.json
@@ -323,7 +323,7 @@
"dictionaries": [
{
"column": "TEST_KYLIN_FACT.TEST_COUNT_DISTINCT_BITMAP",
- "builder": "org.apache.kylin.dict.global.SegmentAppendTrieDictBuilder"
+ "builder": "org.apache.kylin.dict.GlobalDictionaryBuilder"
}
],
"rowkey": {
diff --git
a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
index 25b66f2674..b035caee69 100644
---
a/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
+++
b/examples/test_case_data/localmeta/cube_desc/test_kylin_cube_without_slr_left_join_desc.json
@@ -127,6 +127,17 @@
"returntype" : "bitmap"
},
"dependent_measure_ref" : null
+ },{
+ "name": "TEST_COUNT_DISTINCT_BITMAP",
+ "function": {
+ "expression": "COUNT_DISTINCT",
+ "parameter": {
+ "type": "column",
+ "value": "TEST_COUNT_DISTINCT_BITMAP"
+ },
+ "returntype": "bitmap"
+ },
+ "dependent_measure_ref" : null
}, {
"name" : "SELLER_FORMAT_CNT",
"function" : {
@@ -255,7 +266,7 @@
"name" : "f2",
"columns" : [ {
"qualifier" : "m",
- "measure_refs" : [ "seller_cnt_bitmap", "user_count_bitmap",
"seller_format_cnt"]
+ "measure_refs" : [ "seller_cnt_bitmap", "user_count_bitmap",
"TEST_COUNT_DISTINCT_BITMAP", "seller_format_cnt"]
} ]
}, {
"name" : "f3",
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services