This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new cb89af49e7 [improvement](replica) donot care last failed version in
publish (#21001)
cb89af49e7 is described below
commit cb89af49e7556da8507c11c2103f11ea70223c0d
Author: Yongqiang YANG <[email protected]>
AuthorDate: Tue Jun 20 15:57:54 2023 +0800
[improvement](replica) donot care last failed version in publish (#21001)
We just care 2 things:
1. If the replica acks right
2. If the replica catches up
---
.../doris/transaction/DatabaseTransactionMgr.java | 26 +++++++++-------------
1 file changed, 10 insertions(+), 16 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
b/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
index 5ac67993e4..eb69eab8b7 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java
@@ -938,19 +938,14 @@ public class DatabaseTransactionMgr {
for (Tablet tablet : index.getTablets()) {
int healthReplicaNum = 0;
for (Replica replica : tablet.getReplicas()) {
- if (replica.getLastFailedVersion() >= 0) {
- LOG.info("publish version failed for
transaction {} on tablet {},"
- + " on replica {} due to
lastFailedVersion >= 0",
- transactionState, tablet,
replica);
- continue;
- }
if
(!errorReplicaIds.contains(replica.getId())) {
if
(replica.checkVersionCatchUp(partition.getVisibleVersion(), true)) {
++healthReplicaNum;
} else {
- LOG.info("publish version failed for
transaction {} on tablet {},"
+ LOG.info("publish version {} failed
for transaction {} on tablet {},"
+ " on replica {} due to not
catchup",
- transactionState, tablet,
replica);
+
partitionCommitInfo.getVersion(), transactionState, tablet,
+ replica);
}
} else if (replica.getVersion() >=
partitionCommitInfo.getVersion()) {
// the replica's version is larger than or
equal to current transaction
@@ -959,15 +954,17 @@ public class DatabaseTransactionMgr {
errorReplicaIds.remove(replica.getId());
++healthReplicaNum;
} else {
- LOG.info("publish version failed for
transaction {} on tablet {},"
- + " on replica {} due to version
hole", transactionState, tablet, replica);
+ LOG.info("publish version {} failed for
transaction {} on tablet {},"
+ + " on replica {} due to version
hole or error",
+ partitionCommitInfo.getVersion(),
transactionState, tablet, replica);
}
}
if (healthReplicaNum < quorumReplicaNum) {
- LOG.info("publish version failed for
transaction {} on tablet {},"
+ LOG.info("publish version {} failed for
transaction {} on tablet {},"
+ " with only {} replicas less
than quorum {}",
- transactionState, tablet,
healthReplicaNum, quorumReplicaNum);
+ partitionCommitInfo.getVersion(),
transactionState, tablet, healthReplicaNum,
+ quorumReplicaNum);
String errMsg = String.format("publish on
tablet %d failed."
+ " succeed replica num %d
less than quorum %d."
+ " table: %d, partition: %d,
publish version: %d",
@@ -1637,10 +1634,7 @@ public class DatabaseTransactionMgr {
long newVersion = newCommitVersion;
long lastSuccessVersion =
replica.getLastSuccessVersion();
if (!errorReplicaIds.contains(replica.getId())) {
- if (replica.getLastFailedVersion() > 0) {
- // if the replica is a failed replica,
then not changing version
- newVersion = replica.getVersion();
- } else if
(!replica.checkVersionCatchUp(partition.getVisibleVersion(), true)) {
+ if
(!replica.checkVersionCatchUp(partition.getVisibleVersion(), true)) {
// this means the replica has error in the
past, but we did not observe it
// during upgrade, one job maybe in quorum
finished state, for example,
// A,B,C 3 replica A,B 's version is 10,
C's version is 10 but C' 10 is abnormal
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]