This is an automated email from the ASF dual-hosted git repository.
kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 3161a42 [CARBONDATA-3918] Fix extra data in count(*) after multiple
updates and index server running
3161a42 is described below
commit 3161a42011902fd7ad4e2ee86ac818d2d554d77d
Author: akashrn5 <[email protected]>
AuthorDate: Mon Jul 20 11:22:50 2020 +0530
[CARBONDATA-3918] Fix extra data in count(*) after multiple updates and
index server running
Why is this PR needed?
Select count * gives extra data after multiple updates with the index
server running.
This is because, once the horizontal compaction happens, it stores the
index fils to cache
and create new index and data files, so if the table is updated or deleted
table, we will
exclude those splits after getting all splits. Since once the splits come
from index server
since loadmetadatadetails are transient in Segment object, we will have
null value for it as
the slits are serialized from index server. Because of which it won't be
able to filter out
the IUD old segments. So it leads to extra data in count *.
What changes were proposed in this PR?
Once we get the splits from the index server, then from the validSegments,
get the
loadmetadataDetails and readCommittedScope and set into the splits which
solve this problem.
This closes #3853
---
.../java/org/apache/carbondata/core/index/Segment.java | 4 ++++
.../carbondata/hadoop/api/CarbonTableInputFormat.java | 16 ++++++++++++++++
2 files changed, 20 insertions(+)
diff --git a/core/src/main/java/org/apache/carbondata/core/index/Segment.java
b/core/src/main/java/org/apache/carbondata/core/index/Segment.java
index e76253d..8fb22bc 100644
--- a/core/src/main/java/org/apache/carbondata/core/index/Segment.java
+++ b/core/src/main/java/org/apache/carbondata/core/index/Segment.java
@@ -309,6 +309,10 @@ public class Segment implements Serializable, Writable {
return loadMetadataDetails;
}
+ public void setLoadMetadataDetails(LoadMetadataDetails loadMetadataDetails) {
+ this.loadMetadataDetails = loadMetadataDetails;
+ }
+
public long getIndexSize() {
return indexSize;
}
diff --git
a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
index bd0f5d1..e61f742 100644
---
a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
+++
b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
@@ -356,6 +356,7 @@ public class CarbonTableInputFormat<T> extends
CarbonInputFormat<T> {
getDataBlocksOfSegment(job, carbonTable, expression, validSegments,
invalidSegments, segmentsToBeRefreshed);
numBlocks = dataBlocksOfSegment.size();
+ updateLoadMetaDataDetailsToSegments(validSegments, dataBlocksOfSegment);
for (org.apache.carbondata.hadoop.CarbonInputSplit inputSplit :
dataBlocksOfSegment) {
// Get the UpdateVO for those tables on which IUD operations being
performed.
@@ -386,6 +387,21 @@ public class CarbonTableInputFormat<T> extends
CarbonInputFormat<T> {
return result;
}
+ public void updateLoadMetaDataDetailsToSegments(List<Segment> validSegments,
+ List<org.apache.carbondata.hadoop.CarbonInputSplit> prunedSplits) {
+ for (CarbonInputSplit split : prunedSplits) {
+ Segment segment = split.getSegment();
+ if (segment.getLoadMetadataDetails() == null ||
segment.getReadCommittedScope() == null) {
+ if (validSegments.contains(segment)) {
+ segment.setLoadMetadataDetails(
+
validSegments.get(validSegments.indexOf(segment)).getLoadMetadataDetails());
+ segment.setReadCommittedScope(
+
validSegments.get(validSegments.indexOf(segment)).getReadCommittedScope());
+ }
+ }
+ }
+ }
+
/**
* return valid segment to access
*/