Github user kunal642 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2822#discussion_r226301860
--- Diff:
core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/BlockletFilterScanner.java
---
@@ -316,4 +320,167 @@ private BlockletScannedResult
executeFilter(RawBlockletColumnChunks rawBlockletC
readTime.getCount() + dimensionReadTime);
return scannedResult;
}
+
+ /**
+ * This method will process the data in below order
+ * 1. first apply min max on the filter tree and check whether any of
the filter
+ * is fall on the range of min max, if not then return empty result
+ * 2. If filter falls on min max range then apply filter on actual
+ * data and get the pruned pages.
+ * 3. if pruned pages are not empty then read only those blocks(measure
or dimension)
+ * which was present in the query but not present in the filter, as
while applying filter
+ * some of the blocks where already read and present in chunk holder so
not need to
+ * read those blocks again, this is to avoid reading of same blocks
which was already read
+ * 4. Set the blocks and filter pages to scanned result
+ *
+ * @param rawBlockletColumnChunks blocklet raw chunk of all columns
+ * @throws FilterUnsupportedException
+ */
+ private BlockletScannedResult executeFilterForPages(
+ RawBlockletColumnChunks rawBlockletColumnChunks)
+ throws FilterUnsupportedException, IOException {
+ long startTime = System.currentTimeMillis();
+ QueryStatistic totalBlockletStatistic =
queryStatisticsModel.getStatisticsTypeAndObjMap()
+ .get(QueryStatisticsConstants.TOTAL_BLOCKLET_NUM);
+
totalBlockletStatistic.addCountStatistic(QueryStatisticsConstants.TOTAL_BLOCKLET_NUM,
+ totalBlockletStatistic.getCount() + 1);
+ // apply filter on actual data, for each page
+ BitSet pages = this.filterExecuter.prunePages(rawBlockletColumnChunks);
+ // if filter result is empty then return with empty result
+ if (pages.isEmpty()) {
+
CarbonUtil.freeMemory(rawBlockletColumnChunks.getDimensionRawColumnChunks(),
+ rawBlockletColumnChunks.getMeasureRawColumnChunks());
+
+ QueryStatistic scanTime =
queryStatisticsModel.getStatisticsTypeAndObjMap()
+ .get(QueryStatisticsConstants.SCAN_BLOCKlET_TIME);
+
scanTime.addCountStatistic(QueryStatisticsConstants.SCAN_BLOCKlET_TIME,
+ scanTime.getCount() + (System.currentTimeMillis() - startTime));
+
+ QueryStatistic scannedPages =
queryStatisticsModel.getStatisticsTypeAndObjMap()
+ .get(QueryStatisticsConstants.PAGE_SCANNED);
+ scannedPages.addCountStatistic(QueryStatisticsConstants.PAGE_SCANNED,
+ scannedPages.getCount());
+ return createEmptyResult();
+ }
+
+ BlockletScannedResult scannedResult =
+ new FilterQueryScannedResult(blockExecutionInfo,
queryStatisticsModel);
+
+ // valid scanned blocklet
+ QueryStatistic validScannedBlockletStatistic =
queryStatisticsModel.getStatisticsTypeAndObjMap()
+ .get(QueryStatisticsConstants.VALID_SCAN_BLOCKLET_NUM);
+ validScannedBlockletStatistic
+
.addCountStatistic(QueryStatisticsConstants.VALID_SCAN_BLOCKLET_NUM,
+ validScannedBlockletStatistic.getCount() + 1);
+ // adding statistics for valid number of pages
+ QueryStatistic validPages =
queryStatisticsModel.getStatisticsTypeAndObjMap()
+ .get(QueryStatisticsConstants.VALID_PAGE_SCANNED);
+
validPages.addCountStatistic(QueryStatisticsConstants.VALID_PAGE_SCANNED,
+ validPages.getCount() + pages.cardinality());
+ QueryStatistic scannedPages =
queryStatisticsModel.getStatisticsTypeAndObjMap()
+ .get(QueryStatisticsConstants.PAGE_SCANNED);
+ scannedPages.addCountStatistic(QueryStatisticsConstants.PAGE_SCANNED,
+ scannedPages.getCount() + pages.cardinality());
+ // get the row indexes from bit set for each page
+ int[] pageFilteredPages = new int[pages.cardinality()];
+ int index = 0;
+ for (int i = pages.nextSetBit(0); i >= 0; i = pages.nextSetBit(i + 1))
{
+ pageFilteredPages[index++] = i;
+ }
+ // count(*) case there would not be any dimensions are measures
selected.
+ int[] numberOfRows = new int[pages.cardinality()];
+ for (int i = 0; i < numberOfRows.length; i++) {
+ numberOfRows[i] =
rawBlockletColumnChunks.getDataBlock().getPageRowCount(i);
--- End diff --
This will fill the numberofrows for the pages incorrectly. I think it
should be
for (int i = pages.nextSetBit(0); i >= 0; i = pages.nextSetBit(i + 1)) {
pageFilteredPages[index] = i;
numberOfRows[index++] =
rawBlockletColumnChunks.getDataBlock().getPageRowCount(i);
}
---