[CARBONDATA-1291]:carbonData query performance improvement when number of carbon blocks are high
Limit query performance is slow when one load is having around 8400 carbondata files using Spark Distribution This issue came, when number of blocks are high in that case for each block it is listing the delete delta file which is a expensive operation. Solution- if IUD is done then only check for delete delta files This closes #1324 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/940f4d5e Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/940f4d5e Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/940f4d5e Branch: refs/heads/branch-1.2 Commit: 940f4d5e62c05bd47b192690c67b4970cad38466 Parents: 642b4bf Author: kushalsaha <kushalsaha1...@gmail.com> Authored: Tue Sep 5 17:10:30 2017 +0530 Committer: Ravindra Pesala <ravi.pes...@gmail.com> Committed: Fri Sep 15 09:44:38 2017 +0530 ---------------------------------------------------------------------- .../carbondata/hadoop/CarbonInputFormat.java | 19 ++++++++++--------- .../hadoop/api/CarbonTableInputFormat.java | 19 ++++++++++--------- 2 files changed, 20 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/940f4d5e/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java ---------------------------------------------------------------------- diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java index fb3a637..4e8591e 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java @@ -460,21 +460,22 @@ public class CarbonInputFormat<T> extends FileInputFormat<Void, T> { for (DataRefNode dataRefNode : dataRefNodes) { BlockBTreeLeafNode leafNode = (BlockBTreeLeafNode) dataRefNode; TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo(); + String[] deleteDeltaFilePath = null; if (isIUDTable) { // In case IUD is not performed in this table avoid searching for // invalidated blocks. if (CarbonUtil - .isInvalidTableBlock(tableBlockInfo.getSegmentId(), tableBlockInfo.getFilePath(), - invalidBlockVOForSegmentId, updateStatusManager)) { + .isInvalidTableBlock(tableBlockInfo.getSegmentId(), tableBlockInfo.getFilePath(), + invalidBlockVOForSegmentId, updateStatusManager)) { continue; } - } - String[] deleteDeltaFilePath = null; - try { - deleteDeltaFilePath = - updateStatusManager.getDeleteDeltaFilePath(tableBlockInfo.getFilePath()); - } catch (Exception e) { - throw new IOException(e); + // When iud is done then only get delete delta files for a block + try { + deleteDeltaFilePath = + updateStatusManager.getDeleteDeltaFilePath(tableBlockInfo.getFilePath()); + } catch (Exception e) { + throw new IOException(e); + } } result.add(new CarbonInputSplit(segmentNo, new Path(tableBlockInfo.getFilePath()), tableBlockInfo.getBlockOffset(), tableBlockInfo.getBlockLength(), http://git-wip-us.apache.org/repos/asf/carbondata/blob/940f4d5e/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java ---------------------------------------------------------------------- diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java index f271517..dcc75bd 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java @@ -448,21 +448,22 @@ public class CarbonTableInputFormat<T> extends FileInputFormat<Void, T> { invalidBlockVOForSegmentId = updateStatusManager.getInvalidTimestampRange(inputSplit.getSegmentId()); } + String[] deleteDeltaFilePath = null; if (isIUDTable) { // In case IUD is not performed in this table avoid searching for // invalidated blocks. if (CarbonUtil - .isInvalidTableBlock(inputSplit.getSegmentId(), inputSplit.getPath().toString(), - invalidBlockVOForSegmentId, updateStatusManager)) { + .isInvalidTableBlock(inputSplit.getSegmentId(), inputSplit.getPath().toString(), + invalidBlockVOForSegmentId, updateStatusManager)) { continue; } - } - String[] deleteDeltaFilePath = null; - try { - deleteDeltaFilePath = - updateStatusManager.getDeleteDeltaFilePath(inputSplit.getPath().toString()); - } catch (Exception e) { - throw new IOException(e); + // When iud is done then only get delete delta files for a block + try { + deleteDeltaFilePath = + updateStatusManager.getDeleteDeltaFilePath(inputSplit.getPath().toString()); + } catch (Exception e) { + throw new IOException(e); + } } inputSplit.setDeleteDeltaFiles(deleteDeltaFilePath); result.add(inputSplit);