KYLIN-1307 revisit growing dictionary
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/1c5784b2 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/1c5784b2 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/1c5784b2 Branch: refs/heads/2.x-staging Commit: 1c5784b28ff9bbeb4a1ee9d4db3b05461641165d Parents: 142eaf4 Author: honma <ho...@ebay.com> Authored: Tue Jan 12 16:06:39 2016 +0800 Committer: honma <ho...@ebay.com> Committed: Wed Jan 13 17:52:57 2016 +0800 ---------------------------------------------------------------------- .../apache/kylin/common/KylinConfigBase.java | 5 +- .../apache/kylin/dict/DictionaryManager.java | 58 +++++++++++++++----- .../apache/kylin/rest/service/CacheService.java | 2 + 3 files changed, 51 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/1c5784b2/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java ---------------------------------------------------------------------- diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index f6775e8..bfad306 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -412,7 +412,6 @@ public class KylinConfigBase implements Serializable { return Integer.parseInt(getOptional("kylin.dict.cache.max.entry", "3000")); } - public boolean getQueryRunLocalCoprocessor() { return Boolean.parseBoolean(getOptional("kylin.query.run.local.coprocessor", "false")); } @@ -453,6 +452,10 @@ public class KylinConfigBase implements Serializable { return Integer.parseInt(this.getOptional("kylin.hbase.scan.cache_rows", "1024")); } + public boolean isGrowingDictEnabled() { + return Boolean.parseBoolean(this.getOptional("kylin.dict.growing.enabled", "false")); + } + public int getHBaseScanMaxResultSize() { return Integer.parseInt(this.getOptional("kylin.hbase.scan.max_result_size", "" + (5 * 1024 * 1024))); // 5 MB } http://git-wip-us.apache.org/repos/asf/kylin/blob/1c5784b2/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java index 630b2b2..d49e43d 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java @@ -140,26 +140,58 @@ public class DictionaryManager { initDictInfo(newDict, newDictInfo); - DictionaryInfo largestDictInfo = findLargestDictInfo(newDictInfo); - if (largestDictInfo != null) { - largestDictInfo = getDictionaryInfo(largestDictInfo.getResourcePath()); - Dictionary<?> largestDictObject = largestDictInfo.getDictionaryObject(); - if (largestDictObject.contains(newDict)) { - logger.info("dictionary content " + newDict + ", is contained by dictionary at " + largestDictInfo.getResourcePath()); - return largestDictInfo; - } else if (newDict.contains(largestDictObject)) { - logger.info("dictionary content " + newDict + " is by far the largest, save it"); - return saveNewDict(newDictInfo); + if (KylinConfig.getInstanceFromEnv().isGrowingDictEnabled()) { + DictionaryInfo largestDictInfo = findLargestDictInfo(newDictInfo); + if (largestDictInfo != null) { + largestDictInfo = getDictionaryInfo(largestDictInfo.getResourcePath()); + Dictionary<?> largestDictObject = largestDictInfo.getDictionaryObject(); + if (largestDictObject.contains(newDict)) { + logger.info("dictionary content " + newDict + ", is contained by dictionary at " + largestDictInfo.getResourcePath()); + return largestDictInfo; + } else if (newDict.contains(largestDictObject)) { + logger.info("dictionary content " + newDict + " is by far the largest, save it"); + return saveNewDict(newDictInfo); + } else { + logger.info("merge dict and save..."); + return mergeDictionary(Lists.newArrayList(newDictInfo, largestDictInfo)); + } } else { - logger.info("merge dict and save..."); - return mergeDictionary(Lists.newArrayList(newDictInfo, largestDictInfo)); + logger.info("first dict of this column, save it directly"); + return saveNewDict(newDictInfo); } } else { - logger.info("first dict of this column, save it directly"); + logger.info("Growing dict is not enabled"); + String dupDict = checkDupByContent(newDictInfo, newDict); + if (dupDict != null) { + logger.info("Identical dictionary content, reuse existing dictionary at " + dupDict); + return getDictionaryInfo(dupDict); + } + return saveNewDict(newDictInfo); } } + private String checkDupByContent(DictionaryInfo dictInfo, Dictionary<?> dict) throws IOException { + ResourceStore store = MetadataManager.getInstance(config).getStore(); + ArrayList<String> existings = store.listResources(dictInfo.getResourceDir()); + if (existings == null) + return null; + + logger.info("{} existing dictionaries of the same column", existings.size()); + if (existings.size() > 100) { + logger.warn("Too many dictionaries under {}, dict count: {}", dictInfo.getResourceDir(), existings.size()); + } + + for (String existing : existings) { + DictionaryInfo existingInfo = getDictionaryInfo(existing); + if (existingInfo != null && dict.equals(existingInfo.getDictionaryObject())) { + return existing; + } + } + + return null; + } + private void initDictInfo(Dictionary<?> newDict, DictionaryInfo newDictInfo) { newDictInfo.setCardinality(newDict.getSize()); newDictInfo.setDictionaryObject(newDict); http://git-wip-us.apache.org/repos/asf/kylin/blob/1c5784b2/server/src/main/java/org/apache/kylin/rest/service/CacheService.java ---------------------------------------------------------------------- diff --git a/server/src/main/java/org/apache/kylin/rest/service/CacheService.java b/server/src/main/java/org/apache/kylin/rest/service/CacheService.java index 8227be6..c8bc941 100644 --- a/server/src/main/java/org/apache/kylin/rest/service/CacheService.java +++ b/server/src/main/java/org/apache/kylin/rest/service/CacheService.java @@ -38,6 +38,7 @@ import org.apache.kylin.common.restclient.Broadcaster; import org.apache.kylin.cube.CubeDescManager; import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeManager; +import org.apache.kylin.dict.DictionaryManager; import org.apache.kylin.engine.streaming.StreamingManager; import org.apache.kylin.invertedindex.IIDescManager; import org.apache.kylin.invertedindex.IIManager; @@ -203,6 +204,7 @@ public class CacheService extends BasicService { CubeDescManager.clearCache(); break; case ALL: + DictionaryManager.clearCache(); MetadataManager.clearCache(); CubeDescManager.clearCache(); CubeManager.clearCache();