This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 7afe606b980 [fix](race) fix access colocate group ids race #36444
(#36500)
7afe606b980 is described below
commit 7afe606b980e032d679dc8e44699bcf63c8bf5b8
Author: yujun <[email protected]>
AuthorDate: Wed Jun 19 12:21:47 2024 +0800
[fix](race) fix access colocate group ids race #36444 (#36500)
---
.../apache/doris/catalog/ColocateTableIndex.java | 2 +-
.../clone/ColocateTableCheckerAndBalancer.java | 23 +++++++++++++++++-----
2 files changed, 19 insertions(+), 6 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateTableIndex.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateTableIndex.java
index 23703278fd8..64dfb7af140 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateTableIndex.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateTableIndex.java
@@ -391,7 +391,7 @@ public class ColocateTableIndex implements Writable {
public Set<GroupId> getAllGroupIds() {
readLock();
try {
- return group2Tables.keySet();
+ return Sets.newHashSet(group2Tables.keySet());
} finally {
readUnlock();
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java
b/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java
index e70ec445cd6..d3141a178df 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java
@@ -378,7 +378,7 @@ public class ColocateTableCheckerAndBalancer extends
MasterDaemon {
* A B C D
*/
private void relocateAndBalanceGroups() {
- Set<GroupId> groupIds =
Sets.newHashSet(Env.getCurrentEnv().getColocateTableIndex().getAllGroupIds());
+ Set<GroupId> groupIds =
Env.getCurrentEnv().getColocateTableIndex().getAllGroupIds();
// balance only inside each group, excluded balance between all groups
Set<GroupId> changeGroups = relocateAndBalanceGroup(groupIds, false);
@@ -410,6 +410,10 @@ public class ColocateTableCheckerAndBalancer extends
MasterDaemon {
}
ColocateGroupSchema groupSchema =
colocateIndex.getGroupSchema(groupId);
+ if (groupSchema == null) {
+ LOG.info("Not found colocate group {}, maybe delete", groupId);
+ continue;
+ }
ReplicaAllocation replicaAlloc = groupSchema.getReplicaAlloc();
try {
Env.getCurrentSystemInfo().checkReplicaAllocation(replicaAlloc);
@@ -475,12 +479,19 @@ public class ColocateTableCheckerAndBalancer extends
MasterDaemon {
// check each group
Set<GroupId> groupIds = colocateIndex.getAllGroupIds();
for (GroupId groupId : groupIds) {
+ ColocateGroupSchema groupSchema =
colocateIndex.getGroupSchema(groupId);
+ if (groupSchema == null) {
+ LOG.info("Not found colocate group {}, maybe delete", groupId);
+ continue;
+ }
+
List<Long> tableIds = colocateIndex.getAllTableIds(groupId);
List<Set<Long>> backendBucketsSeq =
colocateIndex.getBackendsPerBucketSeqSet(groupId);
if (backendBucketsSeq.isEmpty()) {
continue;
}
+ ReplicaAllocation replicaAlloc = groupSchema.getReplicaAlloc();
String unstableReason = null;
OUT:
for (Long tableId : tableIds) {
@@ -499,8 +510,6 @@ public class ColocateTableCheckerAndBalancer extends
MasterDaemon {
olapTable.readLock();
try {
for (Partition partition : olapTable.getPartitions()) {
- ReplicaAllocation replicaAlloc
- =
olapTable.getPartitionInfo().getReplicaAllocation(partition.getId());
short replicationNum =
replicaAlloc.getTotalReplicaNum();
long visibleVersion = partition.getVisibleVersion();
// Here we only get VISIBLE indexes. All other indexes
are not queryable.
@@ -531,8 +540,7 @@ public class ColocateTableCheckerAndBalancer extends
MasterDaemon {
TabletSchedCtx tabletCtx = new
TabletSchedCtx(
TabletSchedCtx.Type.REPAIR,
db.getId(), tableId,
partition.getId(), index.getId(), tablet.getId(),
-
olapTable.getPartitionInfo().getReplicaAllocation(partition.getId()),
- System.currentTimeMillis());
+ replicaAlloc,
System.currentTimeMillis());
// the tablet status will be set again
when being scheduled
tabletCtx.setTabletStatus(st);
tabletCtx.setPriority(Priority.NORMAL);
@@ -582,6 +590,7 @@ public class ColocateTableCheckerAndBalancer extends
MasterDaemon {
for (GroupId groupId : groupIds) {
ColocateGroupSchema groupSchema =
colocateIndex.getGroupSchema(groupId);
if (groupSchema == null) {
+ LOG.info("Not found colocate group {}, maybe delete", groupId);
continue;
}
ReplicaAllocation replicaAlloc = groupSchema.getReplicaAlloc();
@@ -712,6 +721,10 @@ public class ColocateTableCheckerAndBalancer extends
MasterDaemon {
GlobalColocateStatistic globalColocateStatistic, List<List<Long>>
balancedBackendsPerBucketSeq,
boolean balanceBetweenGroups) {
ColocateGroupSchema groupSchema =
colocateIndex.getGroupSchema(groupId);
+ if (groupSchema == null) {
+ LOG.info("Not found colocate group {}, maybe delete", groupId);
+ return false;
+ }
short replicaNum =
groupSchema.getReplicaAlloc().getReplicaNumByTag(tag);
List<List<Long>> backendsPerBucketSeq = Lists.newArrayList(
colocateIndex.getBackendsPerBucketSeqByTag(groupId, tag));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]