This is an automated email from the ASF dual-hosted git repository. shaofengshi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kylin.git
commit 80c60ee3142a1de56fc70b1d1a2f1cf57442fd21 Author: shaofengshi <[email protected]> AuthorDate: Fri Aug 24 08:38:03 2018 +0800 KYLIN-3491 enable shrunken gd in ci test --- .../steps/ExtractDictionaryFromGlobalMapper.java | 35 ++++++++++++++-------- .../template/cube/kylin_sales_cube.json | 8 ++--- .../template/cube/kylin_streaming_cube.json | 2 +- .../localmeta/cube_desc/ci_left_join_cube.json | 3 +- 4 files changed, 29 insertions(+), 19 deletions(-) diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/ExtractDictionaryFromGlobalMapper.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/ExtractDictionaryFromGlobalMapper.java index 34a5ec7..be2549e 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/ExtractDictionaryFromGlobalMapper.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/ExtractDictionaryFromGlobalMapper.java @@ -18,13 +18,8 @@ package org.apache.kylin.engine.mr.steps; -import java.io.DataOutputStream; -import java.io.IOException; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Set; - +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -48,8 +43,13 @@ import org.apache.kylin.engine.mr.common.BatchConstants; import org.apache.kylin.engine.mr.common.DictionaryGetterUtil; import org.apache.kylin.metadata.model.TblColRef; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Set; public class ExtractDictionaryFromGlobalMapper<KEYIN, Object> extends KylinMapper<KEYIN, Object, Text, Text> { private String cubeName; @@ -66,12 +66,13 @@ public class ExtractDictionaryFromGlobalMapper<KEYIN, Object> extends KylinMappe private List<Dictionary<String>> globalDicts; private String splitKey; + private KylinConfig config; @Override protected void doSetup(Context context) throws IOException { Configuration conf = context.getConfiguration(); bindCurrentConfiguration(conf); - KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); + config = AbstractHadoopJob.loadKylinPropsAndMetadata(); cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); cube = CubeManager.getInstance(config).getCube(cubeName); @@ -84,14 +85,12 @@ public class ExtractDictionaryFromGlobalMapper<KEYIN, Object> extends KylinMappe globalColumns = cubeDesc.getAllGlobalDictColumns(); globalColumnIndex = new int[globalColumns.size()]; globalColumnValues = Lists.newArrayListWithExpectedSize(globalColumns.size()); - globalDicts = Lists.newArrayListWithExpectedSize(globalColumns.size()); + for (int i = 0; i < globalColumns.size(); i++) { TblColRef colRef = globalColumns.get(i); int columnIndexOnFlatTbl = intermediateTableDesc.getColumnIndex(colRef); globalColumnIndex[i] = columnIndexOnFlatTbl; - globalColumnValues.add(Sets.<String> newHashSet()); - globalDicts.add(cubeSeg.getDictionary(colRef)); } splitKey = DictionaryGetterUtil.getInputSplitSignature(cubeSeg, context.getInputSplit()); @@ -117,9 +116,19 @@ public class ExtractDictionaryFromGlobalMapper<KEYIN, Object> extends KylinMappe FileSystem fs = FileSystem.get(context.getConfiguration()); Path outputDirBase = new Path(context.getConfiguration().get(FileOutputFormat.OUTDIR)); + globalDicts = Lists.newArrayListWithExpectedSize(globalColumns.size()); + Map<TblColRef, Dictionary<String>> dictionaryMap = cubeSeg.buildDictionaryMap(); + for (int i = 0; i < globalColumns.size(); i++) { + TblColRef colRef = globalColumns.get(i); + globalDicts.add(dictionaryMap.get(colRef)); + } + ShrunkenDictionary.StringValueSerializer strValueSerializer = new ShrunkenDictionary.StringValueSerializer(); for (int i = 0; i < globalColumns.size(); i++) { List<String> colDistinctValues = Lists.newArrayList(globalColumnValues.get(i)); + if (colDistinctValues.size() == 0) { + continue; + } // sort values to accelerate the encoding process by reducing the swapping of global dictionary slices Collections.sort(colDistinctValues); diff --git a/examples/sample_cube/template/cube/kylin_sales_cube.json b/examples/sample_cube/template/cube/kylin_sales_cube.json index be07ffb..361a5f9 100644 --- a/examples/sample_cube/template/cube/kylin_sales_cube.json +++ b/examples/sample_cube/template/cube/kylin_sales_cube.json @@ -3,10 +3,10 @@ "last_modified" : 0, "name" : "kylin_sales_cube", - "owner" : null, - "descriptor" : "kylin_sales_cube", - "display_name" : "kylin_sales_cube", + "owner" : "ADMIN", + "descriptor" : "kylin_sales_cube", + "display_name" : "kylin_sales_cube", "status" : "DISABLED", "segments" : [ ], - "create_time_utc" : 0 + "create_time_utc" : 0 } \ No newline at end of file diff --git a/examples/sample_cube/template/cube/kylin_streaming_cube.json b/examples/sample_cube/template/cube/kylin_streaming_cube.json index e5286c3..ea7460d 100644 --- a/examples/sample_cube/template/cube/kylin_streaming_cube.json +++ b/examples/sample_cube/template/cube/kylin_streaming_cube.json @@ -2,7 +2,7 @@ "uuid": "40a27d9d-c5f3-45c4-9b8b-513552219193", "name": "kylin_streaming_cube", - "owner": null, + "owner": "ADMIN", "status": "DISABLED", "segments": [], "last_modified": 0, diff --git a/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json b/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json index e42c522..363c680 100644 --- a/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json +++ b/examples/test_case_data/localmeta/cube_desc/ci_left_join_cube.json @@ -580,7 +580,8 @@ "engine_type": 2, "storage_type": 2, "override_kylin_properties": { - "kylin.cube.algorithm": "INMEM" + "kylin.cube.algorithm": "INMEM", + "kylin.dictionary.shrunken-from-global-enabled": "true" }, "snapshot_table_desc_list": [ {
