Repository: kylin
Updated Branches:
  refs/heads/master 7c0c176b0 -> 0e7658df0


KYLIN-1544 Make hbase encoding and block size configurable just like hbase 
compression


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/0e7658df
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/0e7658df
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/0e7658df

Branch: refs/heads/master
Commit: 0e7658df006b01cdf9d056c6c13f375498bc17b8
Parents: 7c0c176
Author: Hongbin Ma <mahong...@apache.org>
Authored: Tue Mar 29 15:00:09 2016 +0800
Committer: Hongbin Ma <mahong...@apache.org>
Committed: Tue Mar 29 15:03:24 2016 +0800

----------------------------------------------------------------------
 .../org/apache/kylin/common/KylinConfigBase.java |  8 ++++++++
 .../apache/kylin/gridtable/EmptyGTScanner.java   |  8 +++++++-
 .../kylin/gridtable/GTScanRangePlanner.java      |  1 +
 .../apache/kylin/gridtable/GTScanRequest.java    | 18 ++++++++++--------
 .../kylin/rest/controller/ModelController.java   |  1 +
 .../hbase/cube/v2/CubeHBaseEndpointRPC.java      |  2 +-
 .../hbase/cube/v2/CubeSegmentScanner.java        |  2 +-
 .../storage/hbase/steps/CubeHTableUtil.java      | 19 ++++++++++++++++---
 8 files changed, 45 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/0e7658df/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git 
a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java 
b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 0ee2bc3..6638500 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -511,6 +511,14 @@ public class KylinConfigBase implements Serializable {
     public String getHbaseDefaultCompressionCodec() {
         return getOptional("kylin.hbase.default.compression.codec", "");
     }
+    
+    public String getHbaseDefaultEncoding() {
+        return getOptional("kylin.hbase.default.encoding", "FAST_DIFF");
+    }
+
+    public String getHbaseDefaultBlockSize() {
+        return getOptional("kylin.hbase.default.block.size", "4194304");
+    }
 
     public boolean isHiveKeepFlatTable() {
         return 
Boolean.parseBoolean(this.getOptional("kylin.hive.keep.flat.table", "false"));

http://git-wip-us.apache.org/repos/asf/kylin/blob/0e7658df/core-cube/src/main/java/org/apache/kylin/gridtable/EmptyGTScanner.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/gridtable/EmptyGTScanner.java 
b/core-cube/src/main/java/org/apache/kylin/gridtable/EmptyGTScanner.java
index b3507c9..a6b6c47 100644
--- a/core-cube/src/main/java/org/apache/kylin/gridtable/EmptyGTScanner.java
+++ b/core-cube/src/main/java/org/apache/kylin/gridtable/EmptyGTScanner.java
@@ -22,6 +22,12 @@ import java.io.IOException;
 import java.util.Iterator;
 
 public class EmptyGTScanner implements IGTScanner {
+    private int reportScannedRowCount;
+
+    public EmptyGTScanner(int reportScannedRowCount) {
+        this.reportScannedRowCount = reportScannedRowCount;
+    }
+
     @Override
     public GTInfo getInfo() {
         return null;
@@ -29,7 +35,7 @@ public class EmptyGTScanner implements IGTScanner {
 
     @Override
     public int getScannedRowCount() {
-        return 0;
+        return reportScannedRowCount;
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/kylin/blob/0e7658df/core-cube/src/main/java/org/apache/kylin/gridtable/GTScanRangePlanner.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/gridtable/GTScanRangePlanner.java 
b/core-cube/src/main/java/org/apache/kylin/gridtable/GTScanRangePlanner.java
index 3fb4197..2307aaf 100644
--- a/core-cube/src/main/java/org/apache/kylin/gridtable/GTScanRangePlanner.java
+++ b/core-cube/src/main/java/org/apache/kylin/gridtable/GTScanRangePlanner.java
@@ -154,6 +154,7 @@ public class GTScanRangePlanner {
             logger.info("The execution of this query will not use fuzzy key");
             return result;
         }
+        
 
         List<Map<Integer, ByteArray>> fuzzyValueCombinations = 
FuzzyValueCombination.calculate(fuzzyValueSet, MAX_HBASE_FUZZY_KEYS);
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/0e7658df/core-cube/src/main/java/org/apache/kylin/gridtable/GTScanRequest.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/gridtable/GTScanRequest.java 
b/core-cube/src/main/java/org/apache/kylin/gridtable/GTScanRequest.java
index 02ad820..1edfb36 100644
--- a/core-cube/src/main/java/org/apache/kylin/gridtable/GTScanRequest.java
+++ b/core-cube/src/main/java/org/apache/kylin/gridtable/GTScanRequest.java
@@ -87,7 +87,7 @@ public class GTScanRequest {
 
             if (columns == null)
                 columns = ImmutableBitSet.EMPTY;
-            
+
             columns = columns.or(aggrGroupBy);
             columns = columns.or(aggrMetrics);
         }
@@ -143,8 +143,8 @@ public class GTScanRequest {
     public IGTScanner decorateScanner(IGTScanner scanner, boolean doFilter, 
boolean doAggr) throws IOException {
         IGTScanner result = scanner;
         if (!doFilter) { //Skip reading this section if you're not profiling! 
-            lookAndForget(result);
-            return new EmptyGTScanner();
+            int scanned = lookAndForget(result);
+            return new EmptyGTScanner(scanned);
         } else {
 
             if (this.hasFilterPushDown()) {
@@ -152,8 +152,8 @@ public class GTScanRequest {
             }
 
             if (!doAggr) {//Skip reading this section if you're not profiling! 
-                lookAndForget(result);
-                return new EmptyGTScanner();
+                int scanned = lookAndForget(result);
+                return new EmptyGTScanner(scanned);
             }
 
             if (this.allowPreAggregation && this.hasAggregation()) {
@@ -164,9 +164,11 @@ public class GTScanRequest {
     }
 
     //touch every byte of the cell so that the cost of scanning will be truly 
reflected
-    private void lookAndForget(IGTScanner scanner) {
+    private int lookAndForget(IGTScanner scanner) {
         byte meaninglessByte = 0;
+        int scanned = 0;
         for (GTRecord gtRecord : scanner) {
+            scanned++;
             for (ByteArray col : gtRecord.getInternal()) {
                 if (col != null) {
                     int endIndex = col.offset() + col.length();
@@ -176,6 +178,8 @@ public class GTScanRequest {
                 }
             }
         }
+        System.out.println("Meaningless byte is " + meaninglessByte);
+        return scanned;
     }
 
     public boolean hasFilterPushDown() {
@@ -240,7 +244,6 @@ public class GTScanRequest {
         return "GTScanRequest [range=" + range + ", columns=" + columns + ", 
filterPushDown=" + filterPushDown + ", aggrGroupBy=" + aggrGroupBy + ", 
aggrMetrics=" + aggrMetrics + ", aggrMetricsFuncs=" + 
Arrays.toString(aggrMetricsFuncs) + "]";
     }
 
-
     public static final BytesSerializer<GTScanRequest> serializer = new 
BytesSerializer<GTScanRequest>() {
         @Override
         public void serialize(GTScanRequest value, ByteBuffer out) {
@@ -308,5 +311,4 @@ public class GTScanRequest {
 
     };
 
-
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/0e7658df/server/src/main/java/org/apache/kylin/rest/controller/ModelController.java
----------------------------------------------------------------------
diff --git 
a/server/src/main/java/org/apache/kylin/rest/controller/ModelController.java 
b/server/src/main/java/org/apache/kylin/rest/controller/ModelController.java
index 00b79b0..095a731 100644
--- a/server/src/main/java/org/apache/kylin/rest/controller/ModelController.java
+++ b/server/src/main/java/org/apache/kylin/rest/controller/ModelController.java
@@ -88,6 +88,7 @@ public class ModelController extends BasicController {
         if (modelDesc == null || StringUtils.isEmpty(modelDesc.getName())) {
             return modelRequest;
         }
+        
         if (StringUtils.isEmpty(modelDesc.getName())) {
             logger.info("Model name should not be empty.");
             throw new BadRequestException("Model name should not be empty.");

http://git-wip-us.apache.org/repos/asf/kylin/blob/0e7658df/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeHBaseEndpointRPC.java
----------------------------------------------------------------------
diff --git 
a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeHBaseEndpointRPC.java
 
b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeHBaseEndpointRPC.java
index bde2196..2138bcc 100644
--- 
a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeHBaseEndpointRPC.java
+++ 
b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeHBaseEndpointRPC.java
@@ -322,7 +322,7 @@ public class CubeHBaseEndpointRPC extends CubeHBaseRPC {
 
                         for (Map.Entry<byte[], 
CubeVisitProtos.CubeVisitResponse> result : results.entrySet()) {
                             
totalScannedCount.addAndGet(result.getValue().getStats().getScannedRowCount());
-                            logger.info("<spawned by " + currentThreadName + 
">" + getStatsString(result));
+                            logger.info("<spawned by " + currentThreadName + 
"> " + getStatsString(result));
                             try {
                                 
epResultItr.append(CompressionUtils.decompress(HBaseZeroCopyByteString.zeroCopyGetBytes(result.getValue().getCompressedRows())));
                             } catch (IOException | DataFormatException e) {

http://git-wip-us.apache.org/repos/asf/kylin/blob/0e7658df/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeSegmentScanner.java
----------------------------------------------------------------------
diff --git 
a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeSegmentScanner.java
 
b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeSegmentScanner.java
index ea38508..b95fce9 100644
--- 
a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeSegmentScanner.java
+++ 
b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/cube/v2/CubeSegmentScanner.java
@@ -261,7 +261,7 @@ public class CubeSegmentScanner implements IGTScanner {
             try {
                 if (scanRequests.size() == 0) {
                     logger.info("Segment {} will be skipped", cubeSeg);
-                    internal = new EmptyGTScanner();
+                    internal = new EmptyGTScanner(0);
                 } else {
                     internal = rpc.getGTScanner(scanRequests);
                 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/0e7658df/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CubeHTableUtil.java
----------------------------------------------------------------------
diff --git 
a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CubeHTableUtil.java
 
b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CubeHTableUtil.java
index f2aba0a..aed28ad 100644
--- 
a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CubeHTableUtil.java
+++ 
b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CubeHTableUtil.java
@@ -29,6 +29,7 @@ import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.HBaseAdmin;
 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.regionserver.BloomType;
 import org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy;
 import org.apache.hadoop.hbase.security.User;
 import org.apache.kylin.common.KylinConfig;
@@ -107,13 +108,25 @@ public class CubeHTableUtil {
                     break;
                 }
                 default: {
-                    logger.info("hbase will not user any compression codec to 
compress data");
+                    logger.info("hbase will not user any compression algorithm 
to compress data");
+                    cf.setCompressionType(Algorithm.NONE);
                 }
                 }
 
-                cf.setDataBlockEncoding(DataBlockEncoding.FAST_DIFF);
+                int blockSize = 
Integer.valueOf(kylinConfig.getHbaseDefaultBlockSize());
+                cf.setBlocksize(blockSize);
+
+                try {
+                    String encodingStr = kylinConfig.getHbaseDefaultEncoding();
+                    DataBlockEncoding encoding = 
DataBlockEncoding.valueOf(encodingStr);
+                    cf.setDataBlockEncoding(encoding);
+                } catch (Exception e) {
+                    logger.info("hbase will not user any encoding");
+                    cf.setDataBlockEncoding(DataBlockEncoding.NONE);
+                }
+
                 cf.setInMemory(false);
-                cf.setBlocksize(4 * 1024 * 1024); // set to 4MB
+                cf.setBloomFilterType(BloomType.NONE);
                 tableDesc.addFamily(cf);
             }
 

Reply via email to