[CARBONDATA-2770][BloomDataMap] Optimize code to get blocklet id when rebuilding datamap
we should get exactly number of blocklet id from blocklet scanned result instead of building it ourselves. This closes #2539 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/7dfc0efb Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/7dfc0efb Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/7dfc0efb Branch: refs/heads/branch-1.4 Commit: 7dfc0efb5e6dd1fb8f7d12eb128058e1294980ca Parents: 118f458 Author: Manhua <[email protected]> Authored: Mon Jul 23 17:03:58 2018 +0800 Committer: ravipesala <[email protected]> Committed: Tue Jul 31 00:10:41 2018 +0530 ---------------------------------------------------------------------- .../scan/collector/impl/RowIdRawBasedResultCollector.java | 2 +- .../impl/RowIdRestructureBasedRawResultCollector.java | 2 +- .../carbondata/datamap/IndexDataMapRebuildRDD.scala | 10 +--------- 3 files changed, 3 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/7dfc0efb/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RowIdRawBasedResultCollector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RowIdRawBasedResultCollector.java b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RowIdRawBasedResultCollector.java index a68265d..f0291fa 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RowIdRawBasedResultCollector.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RowIdRawBasedResultCollector.java @@ -134,7 +134,7 @@ public class RowIdRawBasedResultCollector extends AbstractScannedResultCollector // 3 for blockletId, pageId, rowId Object[] row = new Object[1 + queryMeasures.length + 3]; scannedResult.incrementCounter(); - row[1 + queryMeasures.length] = scannedResult.getBlockletNumber(); + row[1 + queryMeasures.length] = Integer.parseInt(scannedResult.getBlockletNumber()); row[1 + queryMeasures.length + 1] = scannedResult.getCurrentPageCounter(); ByteArrayWrapper wrapper = new ByteArrayWrapper(); wrapper.setDictionaryKey(dictionaryKeyArrayBatch.get(i)); http://git-wip-us.apache.org/repos/asf/carbondata/blob/7dfc0efb/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RowIdRestructureBasedRawResultCollector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RowIdRestructureBasedRawResultCollector.java b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RowIdRestructureBasedRawResultCollector.java index 28e778f..e343dea 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RowIdRestructureBasedRawResultCollector.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RowIdRestructureBasedRawResultCollector.java @@ -106,7 +106,7 @@ public class RowIdRestructureBasedRawResultCollector extends RestructureBasedRaw // 3 for blockletId, pageId, rowId Object[] row = new Object[1 + queryMeasures.length + 3]; scannedResult.incrementCounter(); - row[1 + queryMeasures.length] = scannedResult.getBlockletNumber(); + row[1 + queryMeasures.length] = Integer.parseInt(scannedResult.getBlockletNumber()); row[1 + queryMeasures.length + 1] = scannedResult.getCurrentPageCounter(); ByteArrayWrapper wrapper = new ByteArrayWrapper(); wrapper.setDictionaryKey(dictionaryKeyArrayBatch.get(i)); http://git-wip-us.apache.org/repos/asf/carbondata/blob/7dfc0efb/integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala b/integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala index 29df1ca..e4d5b26 100644 --- a/integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala +++ b/integration/spark2/src/main/scala/org/apache/carbondata/datamap/IndexDataMapRebuildRDD.scala @@ -357,21 +357,13 @@ class IndexDataMapRebuildRDD[K, V]( // skip clear datamap and we will do this adter rebuild reader.setSkipClearDataMapAtClose(true) - var blockletId = 0 - var firstRow = true while (reader.nextKeyValue()) { val rowWithPosition = reader.getCurrentValue val size = rowWithPosition.length + val blockletId = rowWithPosition(size - 3).asInstanceOf[Int] val pageId = rowWithPosition(size - 2).asInstanceOf[Int] val rowId = rowWithPosition(size - 1).asInstanceOf[Int] - if (!firstRow && pageId == 0 && rowId == 0) { - // new blocklet started, increase blockletId - blockletId = blockletId + 1 - } else { - firstRow = false - } - refresher.addRow(blockletId, pageId, rowId, rowWithPosition) }
