This is an automated email from the ASF dual-hosted git repository.
kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 865ec9b [CARBONDATA-4156] Fix Writing Segment Min max with all blocks
of a segment
865ec9b is described below
commit 865ec9b908783a6169d380415aa6598c41b275d5
Author: Indhumathi27 <[email protected]>
AuthorDate: Tue Mar 9 16:09:37 2021 +0530
[CARBONDATA-4156] Fix Writing Segment Min max with all blocks of a segment
Why is this PR needed?
PR-3999 has removed some code related to getting segment min max from all
blocks.
Because of this, if segment has more than one block, currently, it is
writing
min max considering one block only.
What changes were proposed in this PR?
Reverted specific code from above PR. Removed unwanted synchronization for
some methods
This closes #4101
---
.../core/segmentmeta/SegmentMetaDataInfoStats.java | 35 ++++++++++++++++------
.../carbondata/hadoop/testutil/StoreCreator.java | 3 ++
.../allqueries/TestPruneUsingSegmentMinMax.scala | 1 +
3 files changed, 30 insertions(+), 9 deletions(-)
diff --git
a/core/src/main/java/org/apache/carbondata/core/segmentmeta/SegmentMetaDataInfoStats.java
b/core/src/main/java/org/apache/carbondata/core/segmentmeta/SegmentMetaDataInfoStats.java
index 4f11eb0..9c4ce63 100644
---
a/core/src/main/java/org/apache/carbondata/core/segmentmeta/SegmentMetaDataInfoStats.java
+++
b/core/src/main/java/org/apache/carbondata/core/segmentmeta/SegmentMetaDataInfoStats.java
@@ -22,6 +22,7 @@ import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
+import org.apache.carbondata.core.indexstore.blockletindex.BlockIndex;
import org.apache.carbondata.core.util.ByteUtil;
/**
@@ -53,7 +54,7 @@ public class SegmentMetaDataInfoStats {
* @param segmentId get corresponding segment Id from map
* @return segmentMetaDataInfo for the corresponding segment
*/
- public synchronized SegmentMetaDataInfo getTableSegmentMetaDataInfo(String
tableName,
+ public SegmentMetaDataInfo getTableSegmentMetaDataInfo(String tableName,
String segmentId) {
Map<String, SegmentColumnMetaDataInfo> segmentColumnMetaDataInfoMap = new
LinkedHashMap<>();
Map<String, BlockColumnMetaDataInfo> segmentMetaDataInfoMap =
@@ -85,14 +86,30 @@ public class SegmentMetaDataInfoStats {
public synchronized void setBlockMetaDataInfo(String tableName, String
segmentId,
BlockColumnMetaDataInfo currentBlockColumnMetaInfo) {
// check if tableName is present in tableSegmentMetaDataInfoMap
- Map<String, BlockColumnMetaDataInfo> segmentMinMaxMap = new HashMap<>();
- if (!this.tableSegmentMetaDataInfoMap.isEmpty()
- && null != this.tableSegmentMetaDataInfoMap.get(tableName)
- && !this.tableSegmentMetaDataInfoMap.get(tableName).isEmpty()) {
- segmentMinMaxMap = this.tableSegmentMetaDataInfoMap.get(tableName);
+ if (!this.tableSegmentMetaDataInfoMap.isEmpty() && null !=
this.tableSegmentMetaDataInfoMap
+ .get(tableName) &&
!this.tableSegmentMetaDataInfoMap.get(tableName).isEmpty()
+ && null !=
this.tableSegmentMetaDataInfoMap.get(tableName).get(segmentId)) {
+ // get previous blockColumn metadata information
+ BlockColumnMetaDataInfo previousBlockColumnMetaInfo =
+ this.tableSegmentMetaDataInfoMap.get(tableName).get(segmentId);
+ // compare and get updated min and max values
+ byte[][] updatedMin =
BlockIndex.compareAndUpdateMinMax(previousBlockColumnMetaInfo.getMin(),
+ currentBlockColumnMetaInfo.getMin(), true);
+ byte[][] updatedMax =
BlockIndex.compareAndUpdateMinMax(previousBlockColumnMetaInfo.getMax(),
+ currentBlockColumnMetaInfo.getMax(), false);
+ // update the segment
+ this.tableSegmentMetaDataInfoMap.get(tableName).get(segmentId)
+ .setMinMax(updatedMin, updatedMax);
+ } else {
+ Map<String, BlockColumnMetaDataInfo> segmentMinMaxMap = new HashMap<>();
+ if (!this.tableSegmentMetaDataInfoMap.isEmpty()
+ && null != this.tableSegmentMetaDataInfoMap.get(tableName)
+ && !this.tableSegmentMetaDataInfoMap.get(tableName).isEmpty()) {
+ segmentMinMaxMap = this.tableSegmentMetaDataInfoMap.get(tableName);
+ }
+ segmentMinMaxMap.put(segmentId, currentBlockColumnMetaInfo);
+ this.tableSegmentMetaDataInfoMap.put(tableName, segmentMinMaxMap);
}
- segmentMinMaxMap.put(segmentId, currentBlockColumnMetaInfo);
- this.tableSegmentMetaDataInfoMap.put(tableName, segmentMinMaxMap);
}
/**
@@ -112,7 +129,7 @@ public class SegmentMetaDataInfoStats {
/**
* This method will do min/max comparison of values and update if required
*/
- public synchronized byte[] compareAndUpdateMinMax(byte[] minMaxValueCompare1,
+ public byte[] compareAndUpdateMinMax(byte[] minMaxValueCompare1,
byte[] minMaxValueCompare2, boolean isMinValueComparison) {
// Compare and update min max values
byte[] updatedMinMaxValues = new byte[minMaxValueCompare1.length];
diff --git
a/hadoop/src/main/java/org/apache/carbondata/hadoop/testutil/StoreCreator.java
b/hadoop/src/main/java/org/apache/carbondata/hadoop/testutil/StoreCreator.java
index 7c1d04f..db4f30c 100644
---
a/hadoop/src/main/java/org/apache/carbondata/hadoop/testutil/StoreCreator.java
+++
b/hadoop/src/main/java/org/apache/carbondata/hadoop/testutil/StoreCreator.java
@@ -51,6 +51,7 @@ import
org.apache.carbondata.core.metadata.schema.table.CarbonTable;
import org.apache.carbondata.core.metadata.schema.table.TableInfo;
import org.apache.carbondata.core.metadata.schema.table.TableSchema;
import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
+import org.apache.carbondata.core.segmentmeta.SegmentMetaDataInfoStats;
import org.apache.carbondata.core.statusmanager.LoadMetadataDetails;
import org.apache.carbondata.core.statusmanager.SegmentStatus;
import org.apache.carbondata.core.util.CarbonProperties;
@@ -357,6 +358,8 @@ public class StoreCreator {
writeLoadMetadata(
loadModel.getCarbonDataLoadSchema(), loadModel.getTableName(),
loadModel.getTableName(),
new ArrayList<>());
+ SegmentMetaDataInfoStats.getInstance()
+ .clear(loadModel.getTableName(), loadModel.getSegmentId());
}
public static void writeLoadMetadata(CarbonDataLoadSchema schema, String
databaseName,
diff --git
a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/TestPruneUsingSegmentMinMax.scala
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/TestPruneUsingSegmentMinMax.scala
index 0d0a72f..1077368 100644
---
a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/TestPruneUsingSegmentMinMax.scala
+++
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/allqueries/TestPruneUsingSegmentMinMax.scala
@@ -61,6 +61,7 @@ class TestPruneUsingSegmentMinMax extends QueryTest with
BeforeAndAfterAll {
test("test if matched segment is only loaded to cache after drop column") {
createTablesAndLoadData
checkAnswer(sql("select * from carbon where a=1"), sql("select * from
parquet where a=1"))
+ checkAnswer(sql("select * from carbon where a=2"), sql("select * from
parquet where a=2"))
var showCache = sql("show metacache on table carbon").collect()
assert(showCache(0).get(2).toString.equalsIgnoreCase("1/3 index files
cached"))
checkAnswer(sql("select * from carbon where a=5"), sql("select * from
parquet where a=5"))