This is an automated email from the ASF dual-hosted git repository.
lijibing pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 1976b85fa44 [improvement](statistics)Use min row count of all replicas
as tablet/table row count. (#41894) (#41979)
1976b85fa44 is described below
commit 1976b85fa448cd74497941514bc062468301cb2e
Author: Jibing-Li <[email protected]>
AuthorDate: Wed Oct 16 21:03:15 2024 +0800
[improvement](statistics)Use min row count of all replicas as tablet/table
row count. (#41894) (#41979)
backport: https://github.com/apache/doris/pull/41894
---
.../main/java/org/apache/doris/catalog/Tablet.java | 17 ++++++++++
.../org/apache/doris/catalog/TabletStatMgr.java | 12 ++++++--
.../apache/doris/statistics/OlapAnalysisTask.java | 3 +-
.../java/org/apache/doris/catalog/TabletTest.java | 36 ++++++++++++++++++++++
4 files changed, 65 insertions(+), 3 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
index a7240895029..baf21cbeebe 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
@@ -476,6 +476,23 @@ public class Tablet extends MetaObject implements Writable
{
return singleReplica ?
Double.valueOf(s.average().orElse(0)).longValue() : s.sum();
}
+ // Get the least row count among all valid replicas.
+ // The replica with the least row count is the most accurate one. Because
it performs most compaction.
+ public long getMinReplicaRowCount(long version) {
+ long minRowCount = Long.MAX_VALUE;
+ long maxReplicaVersion = 0;
+ for (Replica r : replicas) {
+ if (r.isAlive()
+ && r.checkVersionCatchUp(version, false)
+ && (r.getVersion() > maxReplicaVersion
+ || r.getVersion() == maxReplicaVersion &&
r.getRowCount() < minRowCount)) {
+ minRowCount = r.getRowCount();
+ maxReplicaVersion = r.getVersion();
+ }
+ }
+ return minRowCount == Long.MAX_VALUE ? 0 : minRowCount;
+ }
+
/**
* A replica is healthy only if
* 1. the backend is available
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java
index 00b3bcfca87..2cbb55dca19 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java
@@ -115,14 +115,17 @@ public class TabletStatMgr extends MasterDaemon {
long indexRowCount = 0L;
boolean indexReported = true;
for (Tablet tablet : index.getTablets()) {
- long tabletRowCount = 0L;
+ long tabletRowCount = Long.MAX_VALUE;
boolean tabletReported = false;
for (Replica replica : tablet.getReplicas()) {
LOG.debug("Table {} replica {} current
version {}, report version {}",
olapTable.getName(),
replica.getId(),
replica.getVersion(),
replica.getLastReportVersion());
+ // Replica with less row count is more
accurate than the others
+ // when replicas' version are identical.
Because less row count
+ // means this replica does more compaction
than the others.
if (replica.checkVersionCatchUp(version,
false)
- && replica.getRowCount() >=
tabletRowCount) {
+ && replica.getRowCount() <
tabletRowCount) {
// 1. If replica version and reported
replica version are all equal to
// PARTITION_INIT_VERSION, set
tabletReported to true, which indicates this
// tablet is empty for sure when
previous report.
@@ -139,6 +142,11 @@ public class TabletStatMgr extends MasterDaemon {
tabletRowCount = replica.getRowCount();
}
}
+
+ // When all BEs are down, avoid set
Long.MAX_VALUE to index and table row count. Use 0.
+ if (tabletRowCount == Long.MAX_VALUE) {
+ tabletRowCount = 0L;
+ }
indexRowCount += tabletRowCount;
// Only when all tablets of this index are
reported, we set indexReported to true.
indexReported = indexReported &&
tabletReported;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index 34fb339564a..94370659b34 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -284,7 +284,8 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
int seekTid = (int) ((i + seek) % ids.size());
long tabletId = ids.get(seekTid);
sampleTabletIds.add(tabletId);
- actualSampledRowCount +=
materializedIndex.getTablet(tabletId).getRowCount(true);
+ actualSampledRowCount += materializedIndex.getTablet(tabletId)
+ .getMinReplicaRowCount(p.getVisibleVersion());
if (actualSampledRowCount >= sampleRows &&
!forPartitionColumn) {
enough = true;
break;
diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java
b/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java
index 99769a6b525..665aaa078ed 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java
@@ -213,4 +213,40 @@ public class TabletTest {
Pair.of(1L, false), Pair.of(2L, false), Pair.of(3L, false),
Pair.of(4L, true)
);
}
+
+ @Test
+ public void testGetMinReplicaRowCount() {
+ Tablet t = new Tablet(1);
+ long row = t.getMinReplicaRowCount(1);
+ Assert.assertEquals(0, row);
+
+ Replica r1 = new Replica(1, 1, 10, 0, 0, 0, 100, ReplicaState.NORMAL,
0, 10);
+ t.addReplica(r1);
+ row = t.getMinReplicaRowCount(10);
+ Assert.assertEquals(100, row);
+
+ row = t.getMinReplicaRowCount(11);
+ Assert.assertEquals(0, row);
+
+ Replica r2 = new Replica(2, 2, 10, 0, 0, 0, 110, ReplicaState.NORMAL,
0, 10);
+ Replica r3 = new Replica(3, 3, 10, 0, 0, 0, 90, ReplicaState.NORMAL,
0, 10);
+ t.addReplica(r2);
+ t.addReplica(r3);
+ row = t.getMinReplicaRowCount(11);
+ Assert.assertEquals(0, row);
+ row = t.getMinReplicaRowCount(9);
+ Assert.assertEquals(90, row);
+
+ r3.setBad(true);
+ row = t.getMinReplicaRowCount(9);
+ Assert.assertEquals(100, row);
+
+ r3.setBad(false);
+ row = t.getMinReplicaRowCount(9);
+ Assert.assertEquals(90, row);
+
+ r2.updateVersion(11);
+ row = t.getMinReplicaRowCount(9);
+ Assert.assertEquals(110, row);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]