This is an automated email from the ASF dual-hosted git repository.
lijibing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new c9886c5c4c6 [improvement](statistics)Use min row count of all replicas
as tablet/table row count. (#41894)
c9886c5c4c6 is described below
commit c9886c5c4c607cd45c9df2a83e48be9f288b8112
Author: Jibing-Li <[email protected]>
AuthorDate: Wed Oct 16 19:11:34 2024 +0800
[improvement](statistics)Use min row count of all replicas as tablet/table
row count. (#41894)
Use min row count of all replicas with same version as tablet/table row
count. Because replica with the least row count means it perform more
compaction operation than the others. Use it as tablet row count is more
accurate.
Meanwhile, use min row count as tablet row count while choosing tablets
during sample analyze.
---
.../main/java/org/apache/doris/catalog/Tablet.java | 17 ++++++++++
.../org/apache/doris/catalog/TabletStatMgr.java | 11 +++++--
.../apache/doris/statistics/OlapAnalysisTask.java | 5 +--
.../java/org/apache/doris/catalog/TabletTest.java | 36 ++++++++++++++++++++++
4 files changed, 65 insertions(+), 4 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
index 9fb44f07126..2dfdab1ad3d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java
@@ -528,6 +528,23 @@ public class Tablet extends MetaObject {
return singleReplica ?
Double.valueOf(s.average().orElse(0)).longValue() : s.sum();
}
+ // Get the least row count among all valid replicas.
+ // The replica with the least row count is the most accurate one. Because
it performs most compaction.
+ public long getMinReplicaRowCount(long version) {
+ long minRowCount = Long.MAX_VALUE;
+ long maxReplicaVersion = 0;
+ for (Replica r : replicas) {
+ if (r.isAlive()
+ && r.checkVersionCatchUp(version, false)
+ && (r.getVersion() > maxReplicaVersion
+ || r.getVersion() == maxReplicaVersion &&
r.getRowCount() < minRowCount)) {
+ minRowCount = r.getRowCount();
+ maxReplicaVersion = r.getVersion();
+ }
+ }
+ return minRowCount == Long.MAX_VALUE ? 0 : minRowCount;
+ }
+
/**
* A replica is healthy only if
* 1. the backend is available
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java
index 18e1bfd6526..aa46c362e38 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletStatMgr.java
@@ -134,15 +134,18 @@ public class TabletStatMgr extends MasterDaemon {
Long tabletDataSize = 0L;
Long tabletRemoteDataSize = 0L;
- Long tabletRowCount = 0L;
+ Long tabletRowCount = Long.MAX_VALUE;
boolean tabletReported = false;
for (Replica replica : tablet.getReplicas()) {
LOG.debug("Table {} replica {} current
version {}, report version {}",
olapTable.getName(),
replica.getId(),
replica.getVersion(),
replica.getLastReportVersion());
+ // Replica with less row count is more
accurate than the others
+ // when replicas' version are identical.
Because less row count
+ // means this replica does more compaction
than the others.
if (replica.checkVersionCatchUp(version,
false)
- && replica.getRowCount() >=
tabletRowCount) {
+ && replica.getRowCount() <
tabletRowCount) {
// 1. If replica version and reported
replica version are all equal to
// PARTITION_INIT_VERSION, set
tabletReported to true, which indicates this
// tablet is empty for sure when
previous report.
@@ -173,6 +176,10 @@ public class TabletStatMgr extends MasterDaemon {
tableDataSize += tabletDataSize;
tableRemoteDataSize += tabletRemoteDataSize;
+ // When all BEs are down, avoid set
Long.MAX_VALUE to index and table row count. Use 0.
+ if (tabletRowCount == Long.MAX_VALUE) {
+ tabletRowCount = 0L;
+ }
tableRowCount += tabletRowCount;
indexRowCount += tabletRowCount;
// Only when all tablets of this index are
reported, we set indexReported to true.
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
index 99a29c601db..fe673ddd1ba 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java
@@ -177,7 +177,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
Map<String, String> params = buildSqlParams();
StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
String sql = stringSubstitutor.replace(BASIC_STATS_TEMPLATE);
- ResultRow resultRow = null;
+ ResultRow resultRow;
try (AutoCloseConnectContext r =
StatisticsUtil.buildConnectContext(false)) {
stmtExecutor = new StmtExecutor(r.connectContext, sql);
resultRow = stmtExecutor.executeInternalQuery().get(0);
@@ -321,7 +321,8 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
int seekTid = (int) ((i + seek) % ids.size());
long tabletId = ids.get(seekTid);
sampleTabletIds.add(tabletId);
- actualSampledRowCount +=
materializedIndex.getTablet(tabletId).getRowCount(true);
+ actualSampledRowCount += materializedIndex.getTablet(tabletId)
+ .getMinReplicaRowCount(p.getVisibleVersion());
if (actualSampledRowCount >= sampleRows &&
!forPartitionColumn) {
enough = true;
break;
diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java
b/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java
index 413851800e3..14902f6fd94 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java
@@ -215,4 +215,40 @@ public class TabletTest {
Pair.of(1L, false), Pair.of(2L, false), Pair.of(3L, false),
Pair.of(4L, true)
);
}
+
+ @Test
+ public void testGetMinReplicaRowCount() {
+ Tablet t = new Tablet(1);
+ long row = t.getMinReplicaRowCount(1);
+ Assert.assertEquals(0, row);
+
+ Replica r1 = new Replica(1, 1, 10, 0, 0, 0, 100, ReplicaState.NORMAL,
0, 10);
+ t.addReplica(r1);
+ row = t.getMinReplicaRowCount(10);
+ Assert.assertEquals(100, row);
+
+ row = t.getMinReplicaRowCount(11);
+ Assert.assertEquals(0, row);
+
+ Replica r2 = new Replica(2, 2, 10, 0, 0, 0, 110, ReplicaState.NORMAL,
0, 10);
+ Replica r3 = new Replica(3, 3, 10, 0, 0, 0, 90, ReplicaState.NORMAL,
0, 10);
+ t.addReplica(r2);
+ t.addReplica(r3);
+ row = t.getMinReplicaRowCount(11);
+ Assert.assertEquals(0, row);
+ row = t.getMinReplicaRowCount(9);
+ Assert.assertEquals(90, row);
+
+ r3.setBad(true);
+ row = t.getMinReplicaRowCount(9);
+ Assert.assertEquals(100, row);
+
+ r3.setBad(false);
+ row = t.getMinReplicaRowCount(9);
+ Assert.assertEquals(90, row);
+
+ r2.updateVersion(11);
+ row = t.getMinReplicaRowCount(9);
+ Assert.assertEquals(110, row);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]