[CARBONDATA-1291]:carbonData query performance improvement when number of 
carbon blocks are high

Limit query performance is slow when one load is having around 8400 carbondata 
files using Spark Distribution
This issue came, when number of blocks are high in that case for each block it 
is listing the delete delta file which is a expensive operation.
Solution- if IUD is done then only check for delete delta files

This closes #1324


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/940f4d5e
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/940f4d5e
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/940f4d5e

Branch: refs/heads/branch-1.2
Commit: 940f4d5e62c05bd47b192690c67b4970cad38466
Parents: 642b4bf
Author: kushalsaha <kushalsaha1...@gmail.com>
Authored: Tue Sep 5 17:10:30 2017 +0530
Committer: Ravindra Pesala <ravi.pes...@gmail.com>
Committed: Fri Sep 15 09:44:38 2017 +0530

----------------------------------------------------------------------
 .../carbondata/hadoop/CarbonInputFormat.java     | 19 ++++++++++---------
 .../hadoop/api/CarbonTableInputFormat.java       | 19 ++++++++++---------
 2 files changed, 20 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/940f4d5e/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java
----------------------------------------------------------------------
diff --git 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java
index fb3a637..4e8591e 100644
--- a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java
+++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java
@@ -460,21 +460,22 @@ public class CarbonInputFormat<T> extends 
FileInputFormat<Void, T> {
       for (DataRefNode dataRefNode : dataRefNodes) {
         BlockBTreeLeafNode leafNode = (BlockBTreeLeafNode) dataRefNode;
         TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo();
+        String[] deleteDeltaFilePath = null;
         if (isIUDTable) {
           // In case IUD is not performed in this table avoid searching for
           // invalidated blocks.
           if (CarbonUtil
-              .isInvalidTableBlock(tableBlockInfo.getSegmentId(), 
tableBlockInfo.getFilePath(),
-                  invalidBlockVOForSegmentId, updateStatusManager)) {
+                  .isInvalidTableBlock(tableBlockInfo.getSegmentId(), 
tableBlockInfo.getFilePath(),
+                          invalidBlockVOForSegmentId, updateStatusManager)) {
             continue;
           }
-        }
-        String[] deleteDeltaFilePath = null;
-        try {
-          deleteDeltaFilePath =
-              
updateStatusManager.getDeleteDeltaFilePath(tableBlockInfo.getFilePath());
-        } catch (Exception e) {
-          throw new IOException(e);
+          // When iud is done then only get delete delta files for a block
+          try {
+            deleteDeltaFilePath =
+                    
updateStatusManager.getDeleteDeltaFilePath(tableBlockInfo.getFilePath());
+          } catch (Exception e) {
+            throw new IOException(e);
+          }
         }
         result.add(new CarbonInputSplit(segmentNo, new 
Path(tableBlockInfo.getFilePath()),
             tableBlockInfo.getBlockOffset(), tableBlockInfo.getBlockLength(),

http://git-wip-us.apache.org/repos/asf/carbondata/blob/940f4d5e/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
----------------------------------------------------------------------
diff --git 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
index f271517..dcc75bd 100644
--- 
a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
+++ 
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
@@ -448,21 +448,22 @@ public class CarbonTableInputFormat<T> extends 
FileInputFormat<Void, T> {
         invalidBlockVOForSegmentId =
             
updateStatusManager.getInvalidTimestampRange(inputSplit.getSegmentId());
       }
+      String[] deleteDeltaFilePath = null;
       if (isIUDTable) {
         // In case IUD is not performed in this table avoid searching for
         // invalidated blocks.
         if (CarbonUtil
-            .isInvalidTableBlock(inputSplit.getSegmentId(), 
inputSplit.getPath().toString(),
-                invalidBlockVOForSegmentId, updateStatusManager)) {
+                .isInvalidTableBlock(inputSplit.getSegmentId(), 
inputSplit.getPath().toString(),
+                        invalidBlockVOForSegmentId, updateStatusManager)) {
           continue;
         }
-      }
-      String[] deleteDeltaFilePath = null;
-      try {
-        deleteDeltaFilePath =
-            
updateStatusManager.getDeleteDeltaFilePath(inputSplit.getPath().toString());
-      } catch (Exception e) {
-        throw new IOException(e);
+        // When iud is done then only get delete delta files for a block
+        try {
+          deleteDeltaFilePath =
+                  
updateStatusManager.getDeleteDeltaFilePath(inputSplit.getPath().toString());
+        } catch (Exception e) {
+          throw new IOException(e);
+        }
       }
       inputSplit.setDeleteDeltaFiles(deleteDeltaFilePath);
       result.add(inputSplit);

Reply via email to