Repository: carbondata
Updated Branches:
  refs/heads/carbonstore 3e1da7c3b -> 00cc601da (forced update)


[CARBONDATA-2020][Old Store Support] Add filter support for old store reading 
to improve query performance

Problem
For old stores blocklet level min/max comparison was not happening in the 
executor side due to which all the blocklets were getting scanned. This 
increased the IO and scanning time in the executor.

Solution
Modified code to retrieve the min/max value from blocklet node and use it for 
comparsion while scanning for valid blocklets.

This closes #1818


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/d1d726a7
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/d1d726a7
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/d1d726a7

Branch: refs/heads/carbonstore
Commit: d1d726a777c8ed848d71fa05a18dfd70b05d65bd
Parents: 9550e69
Author: manishgupta88 <[email protected]>
Authored: Wed Jan 17 11:27:09 2018 +0530
Committer: ravipesala <[email protected]>
Committed: Thu Jan 18 22:30:23 2018 +0530

----------------------------------------------------------------------
 .../blockletindex/BlockletDataMap.java          |  8 +++++--
 .../BlockletDataRefNodeWrapper.java             | 19 ++++++++++++++--
 .../core/scan/filter/ColumnFilterInfo.java      | 24 ++++++++++++++++++++
 .../ImplicitIncludeFilterExecutorImpl.java      |  7 +++++-
 .../core/scan/scanner/impl/FilterScanner.java   | 18 ++++++++++++---
 5 files changed, 68 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/d1d726a7/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
index ee74fad..e1fa686 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java
@@ -691,8 +691,12 @@ public class BlockletDataMap implements DataMap, Cacheable 
{
       byte[][] minValue, String filePath, int blockletId) {
     BitSet bitSet = null;
     if (filterExecuter instanceof ImplicitColumnFilterExecutor) {
-      String uniqueBlockPath = 
filePath.substring(filePath.lastIndexOf("/Part") + 1)
-          + CarbonCommonConstants.FILE_SEPARATOR + blockletId;
+      String uniqueBlockPath = 
filePath.substring(filePath.lastIndexOf("/Part") + 1);
+      // this case will come in case of old store where index file does not 
contain the
+      // blocklet information
+      if (blockletId != -1) {
+        uniqueBlockPath = uniqueBlockPath + 
CarbonCommonConstants.FILE_SEPARATOR + blockletId;
+      }
       bitSet = ((ImplicitColumnFilterExecutor) filterExecuter)
           .isFilterValuesPresentInBlockOrBlocklet(maxValue, minValue, 
uniqueBlockPath);
     } else {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d1d726a7/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java
 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java
index dfc8a38..1585414 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java
@@ -31,6 +31,7 @@ import 
org.apache.carbondata.core.datastore.chunk.reader.DimensionColumnChunkRea
 import 
org.apache.carbondata.core.datastore.chunk.reader.MeasureColumnChunkReader;
 import org.apache.carbondata.core.indexstore.BlockletDetailInfo;
 import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
+import org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex;
 
 /**
  * wrapper for blocklet data map data
@@ -91,11 +92,25 @@ public class BlockletDataRefNodeWrapper implements 
DataRefNode {
     return blockInfos.get(index).getDetailInfo().getBlockletId().toString();
   }
 
-  @Override public byte[][] getColumnsMaxValue() {
+  @Override
+  public byte[][] getColumnsMaxValue() {
+    BlockletIndex blockletIndex =
+        
blockInfos.get(index).getDetailInfo().getBlockletInfo().getBlockletIndex();
+    // In case of blocklet distribution this will be null
+    if (null != blockletIndex) {
+      return blockletIndex.getMinMaxIndex().getMaxValues();
+    }
     return null;
   }
 
-  @Override public byte[][] getColumnsMinValue() {
+  @Override
+  public byte[][] getColumnsMinValue() {
+    BlockletIndex blockletIndex =
+        
blockInfos.get(index).getDetailInfo().getBlockletInfo().getBlockletIndex();
+    // In case of blocklet distribution this will be null
+    if (null != blockletIndex) {
+      return blockletIndex.getMinMaxIndex().getMinValues();
+    }
     return null;
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d1d726a7/core/src/main/java/org/apache/carbondata/core/scan/filter/ColumnFilterInfo.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/filter/ColumnFilterInfo.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/filter/ColumnFilterInfo.java
index 6a7fced..b5b6017 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/filter/ColumnFilterInfo.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/filter/ColumnFilterInfo.java
@@ -18,7 +18,11 @@
 package org.apache.carbondata.core.scan.filter;
 
 import java.io.Serializable;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
+
+import org.apache.carbondata.core.constants.CarbonCommonConstants;
 
 public class ColumnFilterInfo implements Serializable {
 
@@ -32,6 +36,7 @@ public class ColumnFilterInfo implements Serializable {
    * Implicit column filter values to be used for block and blocklet pruning
    */
   private List<String> implicitColumnFilterList;
+  private transient Set<String> implicitDriverColumnFilterList;
   private List<Integer> excludeFilterList;
   /**
    * maintain the no dictionary filter values list.
@@ -95,4 +100,23 @@ public class ColumnFilterInfo implements Serializable {
   public void setMeasuresFilterValuesList(List<Object> 
measuresFilterValuesList) {
     this.measuresFilterValuesList = measuresFilterValuesList;
   }
+
+  public Set<String> getImplicitDriverColumnFilterList() {
+    // this list is required to be populated only n case of driver, so in 
executor this check will
+    // avoid unnecessary loading of the driver filter list
+    if (null == implicitDriverColumnFilterList) {
+      populateBlockIdListForDriverBlockPruning();
+    }
+    return implicitDriverColumnFilterList;
+  }
+
+  private void populateBlockIdListForDriverBlockPruning() {
+    implicitDriverColumnFilterList = new 
HashSet<>(implicitColumnFilterList.size());
+    String blockId = null;
+    for (String blockletId : implicitColumnFilterList) {
+      blockId =
+          blockletId.substring(0, 
blockletId.lastIndexOf(CarbonCommonConstants.FILE_SEPARATOR));
+      implicitDriverColumnFilterList.add(blockId);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d1d726a7/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitIncludeFilterExecutorImpl.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitIncludeFilterExecutorImpl.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitIncludeFilterExecutorImpl.java
index 5289e45..9c4c7ba 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitIncludeFilterExecutorImpl.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitIncludeFilterExecutorImpl.java
@@ -70,7 +70,12 @@ public class ImplicitIncludeFilterExecutorImpl
     BitSet bitSet = new BitSet(1);
     boolean isScanRequired = false;
     String shortBlockId = CarbonTablePath.getShortBlockId(uniqueBlockPath);
-    if (dimColumnEvaluatorInfo.getFilterValues().getImplicitColumnFilterList()
+    if (uniqueBlockPath.endsWith(".carbondata")) {
+      if 
(dimColumnEvaluatorInfo.getFilterValues().getImplicitDriverColumnFilterList()
+          .contains(shortBlockId)) {
+        isScanRequired = true;
+      }
+    } else if 
(dimColumnEvaluatorInfo.getFilterValues().getImplicitColumnFilterList()
         .contains(shortBlockId)) {
       isScanRequired = true;
     }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/d1d726a7/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/FilterScanner.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/FilterScanner.java
 
b/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/FilterScanner.java
index efc0e20..79f9b49 100644
--- 
a/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/FilterScanner.java
+++ 
b/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/FilterScanner.java
@@ -29,6 +29,7 @@ import org.apache.carbondata.core.datastore.page.ColumnPage;
 import org.apache.carbondata.core.scan.executor.infos.BlockExecutionInfo;
 import 
org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException;
 import org.apache.carbondata.core.scan.filter.executer.FilterExecuter;
+import 
org.apache.carbondata.core.scan.filter.executer.ImplicitColumnFilterExecutor;
 import org.apache.carbondata.core.scan.processor.BlocksChunkHolder;
 import org.apache.carbondata.core.scan.result.AbstractScannedResult;
 import org.apache.carbondata.core.scan.result.impl.FilterQueryScannedResult;
@@ -107,9 +108,20 @@ public class FilterScanner extends AbstractBlockletScanner 
{
         totalPagesScanned.getCount() + 
blocksChunkHolder.getDataBlock().numberOfPages());
     // apply min max
     if (isMinMaxEnabled) {
-      BitSet bitSet = this.filterExecuter
-          
.isScanRequired(blocksChunkHolder.getDataBlock().getColumnsMaxValue(),
-              blocksChunkHolder.getDataBlock().getColumnsMinValue());
+      BitSet bitSet = null;
+      // check for implicit include filter instance
+      if (filterExecuter instanceof ImplicitColumnFilterExecutor) {
+        String blockletId = blockExecutionInfo.getBlockId() + 
CarbonCommonConstants.FILE_SEPARATOR
+            + blocksChunkHolder.getDataBlock().blockletId();
+        bitSet = ((ImplicitColumnFilterExecutor) filterExecuter)
+            .isFilterValuesPresentInBlockOrBlocklet(
+                blocksChunkHolder.getDataBlock().getColumnsMaxValue(),
+                blocksChunkHolder.getDataBlock().getColumnsMinValue(), 
blockletId);
+      } else {
+        bitSet = this.filterExecuter
+            
.isScanRequired(blocksChunkHolder.getDataBlock().getColumnsMaxValue(),
+                blocksChunkHolder.getDataBlock().getColumnsMinValue());
+      }
       if (bitSet.isEmpty()) {
         CarbonUtil.freeMemory(blocksChunkHolder.getDimensionRawDataChunk(),
             blocksChunkHolder.getMeasureRawDataChunk());

Reply via email to