Repository: kylin Updated Branches: refs/heads/master 7c0c176b0 -> 0e7658df0
KYLIN-1544 Make hbase encoding and block size configurable just like hbase compression Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/0e7658df Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/0e7658df Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/0e7658df Branch: refs/heads/master Commit: 0e7658df006b01cdf9d056c6c13f375498bc17b8 Parents: 7c0c176 Author: Hongbin Ma <mahong...@apache.org> Authored: Tue Mar 29 15:00:09 2016 +0800 Committer: Hongbin Ma <mahong...@apache.org> Committed: Tue Mar 29 15:03:24 2016 +0800 ---------------------------------------------------------------------- .../org/apache/kylin/common/KylinConfigBase.java | 8 ++++++++ .../apache/kylin/gridtable/EmptyGTScanner.java | 8 +++++++- .../kylin/gridtable/GTScanRangePlanner.java | 1 + .../apache/kylin/gridtable/GTScanRequest.java | 18 ++++++++++-------- .../kylin/rest/controller/ModelController.java | 1 + .../hbase/cube/v2/CubeHBaseEndpointRPC.java | 2 +- .../hbase/cube/v2/CubeSegmentScanner.java | 2 +- .../storage/hbase/steps/CubeHTableUtil.java | 19 ++++++++++++++++--- 8 files changed, 45 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/0e7658df/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java ---------------------------------------------------------------------- diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index 0ee2bc3..6638500 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -511,6 +511,14 @@ public class KylinConfigBase implements Serializable { public String getHbaseDefaultCompressionCodec() { return getOptional("kylin.hbase.default.compression.codec", ""); } + + public String getHbaseDefaultEncoding() { + return getOptional("kylin.hbase.default.encoding", "FAST_DIFF"); + } + + public String getHbaseDefaultBlockSize() { + return getOptional("kylin.hbase.default.block.size", "4194304"); + } public boolean isHiveKeepFlatTable() { return Boolean.parseBoolean(this.getOptional("kylin.hive.keep.flat.table", "false")); http://git-wip-us.apache.org/repos/asf/kylin/blob/0e7658df/core-cube/src/main/java/org/apache/kylin/gridtable/EmptyGTScanner.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/gridtable/EmptyGTScanner.java b/core-cube/src/main/java/org/apache/kylin/gridtable/EmptyGTScanner.java index b3507c9..a6b6c47 100644 --- a/core-cube/src/main/java/org/apache/kylin/gridtable/EmptyGTScanner.java +++ b/core-cube/src/main/java/org/apache/kylin/gridtable/EmptyGTScanner.java @@ -22,6 +22,12 @@ import java.io.IOException; import java.util.Iterator; public class EmptyGTScanner implements IGTScanner { + private int reportScannedRowCount; + + public EmptyGTScanner(int reportScannedRowCount) { + this.reportScannedRowCount = reportScannedRowCount; + } + @Override public GTInfo getInfo() { return null; @@ -29,7 +35,7 @@ public class EmptyGTScanner implements IGTScanner { @Override public int getScannedRowCount() { - return 0; + return reportScannedRowCount; } @Override http://git-wip-us.apache.org/repos/asf/kylin/blob/0e7658df/core-cube/src/main/java/org/apache/kylin/gridtable/GTScanRangePlanner.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/gridtable/GTScanRangePlanner.java b/core-cube/src/main/java/org/apache/kylin/gridtable/GTScanRangePlanner.java index 3fb4197..2307aaf 100644 --- a/core-cube/src/main/java/org/apache/kylin/gridtable/GTScanRangePlanner.java +++ b/core-cube/src/main/java/org/apache/kylin/gridtable/GTScanRangePlanner.java @@ -154,6 +154,7 @@ public class GTScanRangePlanner { logger.info("The execution of this query will not use fuzzy key"); return result; } + List<Map<Integer, ByteArray>> fuzzyValueCombinations = FuzzyValueCombination.calculate(fuzzyValueSet, MAX_HBASE_FUZZY_KEYS); http://git-wip-us.apache.org/repos/asf/kylin/blob/0e7658df/core-cube/src/main/java/org/apache/kylin/gridtable/GTScanRequest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/gridtable/GTScanRequest.java b/core-cube/src/main/java/org/apache/kylin/gridtable/GTScanRequest.java index 02ad820..1edfb36 100644 --- a/core-cube/src/main/java/org/apache/kylin/gridtable/GTScanRequest.java +++ b/core-cube/src/main/java/org/apache/kylin/gridtable/GTScanRequest.java @@ -87,7 +87,7 @@ public class GTScanRequest { if (columns == null) columns = ImmutableBitSet.EMPTY; - + columns = columns.or(aggrGroupBy); columns = columns.or(aggrMetrics); } @@ -143,8 +143,8 @@ public class GTScanRequest { public IGTScanner decorateScanner(IGTScanner scanner, boolean doFilter, boolean doAggr) throws IOException { IGTScanner result = scanner; if (!doFilter) { //Skip reading this section if you're not profiling! - lookAndForget(result); - return new EmptyGTScanner(); + int scanned = lookAndForget(result); + return new EmptyGTScanner(scanned); } else { if (this.hasFilterPushDown()) { @@ -152,8 +152,8 @@ public class GTScanRequest { } if (!doAggr) {//Skip reading this section if you're not profiling! - lookAndForget(result); - return new EmptyGTScanner(); + int scanned = lookAndForget(result); + return new EmptyGTScanner(scanned); } if (this.allowPreAggregation && this.hasAggregation()) { @@ -164,9 +164,11 @@ public class GTScanRequest { } //touch every byte of the cell so that the cost of scanning will be truly reflected - private void lookAndForget(IGTScanner scanner) { + private int lookAndForget(IGTScanner scanner) { byte meaninglessByte = 0; + int scanned = 0; for (GTRecord gtRecord : scanner) { + scanned++; for (ByteArray col : gtRecord.getInternal()) { if (col != null) { int endIndex = col.offset() + col.length(); @@ -176,6 +178,8 @@ public class GTScanRequest { } } } + System.out.println("Meaningless byte is " + meaninglessByte); + return scanned; } public boolean hasFilterPushDown() { @@ -240,7 +244,6 @@ public class GTScanRequest { return "GTScanRequest [range=" + range + ", columns=" + columns + ", filterPushDown=" + filterPushDown + ", aggrGroupBy=" + aggrGroupBy + ", aggrMetrics=" + aggrMetrics + ", aggrMetricsFuncs=" + Arrays.toString(aggrMetricsFuncs) + "]"; } - public static final BytesSerializer<GTScanRequest> serializer = new BytesSerializer<GTScanRequest>() { @Override public void serialize(GTScanRequest value, ByteBuffer out) { @@ -308,5 +311,4 @@ public class GTScanRequest { }; - } http://git-wip-us.apache.org/repos/asf/kylin/blob/0e7658df/server/src/main/java/org/apache/kylin/rest/controller/ModelController.java ---------------------------------------------------------------------- diff --git a/server/src/main/java/org/apache/kylin/rest/controller/ModelController.java b/server/src/main/java/org/apache/kylin/rest/controller/ModelController.java index 00b79b0..095a731 100644 --- a/server/src/main/java/org/apache/kylin/rest/controller/ModelController.java +++ b/server/src/main/java/org/apache/kylin/rest/controller/ModelController.java @@ -88,6 +88,7 @@ public class ModelController extends BasicController { if (modelDesc == null || StringUtils.isEmpty(modelDesc.getName())) { return modelRequest; } + if (StringUtils.isEmpty(modelDesc.getName())) { logger.info("Model name should not be empty."); throw new BadRequestException("Model name should not be empty."); http://git-wip-us.apache.org/repos/asf/kylin/blob/0e7658df/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeHBaseEndpointRPC.java ---------------------------------------------------------------------- diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeHBaseEndpointRPC.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeHBaseEndpointRPC.java index bde2196..2138bcc 100644 --- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeHBaseEndpointRPC.java +++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeHBaseEndpointRPC.java @@ -322,7 +322,7 @@ public class CubeHBaseEndpointRPC extends CubeHBaseRPC { for (Map.Entry<byte[], CubeVisitProtos.CubeVisitResponse> result : results.entrySet()) { totalScannedCount.addAndGet(result.getValue().getStats().getScannedRowCount()); - logger.info("<spawned by " + currentThreadName + ">" + getStatsString(result)); + logger.info("<spawned by " + currentThreadName + "> " + getStatsString(result)); try { epResultItr.append(CompressionUtils.decompress(HBaseZeroCopyByteString.zeroCopyGetBytes(result.getValue().getCompressedRows()))); } catch (IOException | DataFormatException e) { http://git-wip-us.apache.org/repos/asf/kylin/blob/0e7658df/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeSegmentScanner.java ---------------------------------------------------------------------- diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeSegmentScanner.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeSegmentScanner.java index ea38508..b95fce9 100644 --- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeSegmentScanner.java +++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeSegmentScanner.java @@ -261,7 +261,7 @@ public class CubeSegmentScanner implements IGTScanner { try { if (scanRequests.size() == 0) { logger.info("Segment {} will be skipped", cubeSeg); - internal = new EmptyGTScanner(); + internal = new EmptyGTScanner(0); } else { internal = rpc.getGTScanner(scanRequests); } http://git-wip-us.apache.org/repos/asf/kylin/blob/0e7658df/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CubeHTableUtil.java ---------------------------------------------------------------------- diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CubeHTableUtil.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CubeHTableUtil.java index f2aba0a..aed28ad 100644 --- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CubeHTableUtil.java +++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CubeHTableUtil.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.regionserver.BloomType; import org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy; import org.apache.hadoop.hbase.security.User; import org.apache.kylin.common.KylinConfig; @@ -107,13 +108,25 @@ public class CubeHTableUtil { break; } default: { - logger.info("hbase will not user any compression codec to compress data"); + logger.info("hbase will not user any compression algorithm to compress data"); + cf.setCompressionType(Algorithm.NONE); } } - cf.setDataBlockEncoding(DataBlockEncoding.FAST_DIFF); + int blockSize = Integer.valueOf(kylinConfig.getHbaseDefaultBlockSize()); + cf.setBlocksize(blockSize); + + try { + String encodingStr = kylinConfig.getHbaseDefaultEncoding(); + DataBlockEncoding encoding = DataBlockEncoding.valueOf(encodingStr); + cf.setDataBlockEncoding(encoding); + } catch (Exception e) { + logger.info("hbase will not user any encoding"); + cf.setDataBlockEncoding(DataBlockEncoding.NONE); + } + cf.setInMemory(false); - cf.setBlocksize(4 * 1024 * 1024); // set to 4MB + cf.setBloomFilterType(BloomType.NONE); tableDesc.addFamily(cf); }