This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c28fe0cd1ac [fix](cloud) modify CloudTabletRebalancer and 
CloudTabletStatMgr to reduce memory (#61318)
c28fe0cd1ac is described below

commit c28fe0cd1ac7ef5d31feb1da30e7bc95753ae05c
Author: meiyi <[email protected]>
AuthorDate: Sat Mar 14 10:10:52 2026 +0800

    [fix](cloud) modify CloudTabletRebalancer and CloudTabletStatMgr to reduce 
memory (#61318)
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary:
    
    Reduce FE memory by
    1. moving top-N table stats filtering from PrometheusMetricVisitor into
    CloudTabletStatMgr so it's computed once per stat cycle instead of per
    Prometheus scrape,
    2. removing the unused beToTablets field from InfightTask to avoid
    retaining a large map reference
    3. changing InfightTablet.tabletId from Long to long to avoid boxing
    overhead.
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 .../apache/doris/catalog/CloudTabletStatMgr.java   | 31 +++++++++++++++++++++-
 .../doris/cloud/catalog/CloudTabletRebalancer.java | 13 ++++-----
 .../doris/metric/PrometheusMetricVisitor.java      | 26 +-----------------
 3 files changed, 36 insertions(+), 34 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/CloudTabletStatMgr.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/CloudTabletStatMgr.java
index 5b14786c1e7..fd500fac1a0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/CloudTabletStatMgr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/CloudTabletStatMgr.java
@@ -35,7 +35,9 @@ import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
 import java.util.ArrayList;
+import java.util.Comparator;
 import java.util.List;
+import java.util.PriorityQueue;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -48,6 +50,8 @@ import java.util.concurrent.Future;
 public class CloudTabletStatMgr extends MasterDaemon {
     private static final Logger LOG = 
LogManager.getLogger(CloudTabletStatMgr.class);
 
+    private volatile long totalTableSize = 0;
+    // keep Config.prom_output_table_metrics_limit tables with the largest 
data size, used for prometheus output
     private volatile List<OlapTable.Statistics> cloudTableStatsList = new 
ArrayList<>();
 
     private static final ExecutorService GET_TABLET_STATS_THREAD_POOL = 
Executors.newFixedThreadPool(
@@ -290,7 +294,8 @@ public class CloudTabletStatMgr extends MasterDaemon {
                 newCloudTableStatsList.add(tableStats);
             }
         }
-        this.cloudTableStatsList = newCloudTableStatsList;
+        filterTopTableStatsByDataSize(newCloudTableStatsList);
+        this.totalTableSize = totalTableSize;
 
         if (MetricRepo.isInit) {
             
MetricRepo.GAUGE_MAX_TABLE_SIZE_BYTES.setValue(maxTableSize.second);
@@ -365,4 +370,28 @@ public class CloudTabletStatMgr extends MasterDaemon {
     public List<OlapTable.Statistics> getCloudTableStats() {
         return this.cloudTableStatsList;
     }
+
+    public long getTotalTableSize() {
+        return this.totalTableSize;
+    }
+
+    private void filterTopTableStatsByDataSize(List<OlapTable.Statistics> 
newCloudTableStatsList) {
+        int limit = Config.prom_output_table_metrics_limit;
+        if (limit <= 0 || newCloudTableStatsList.size() <= limit) {
+            this.cloudTableStatsList = newCloudTableStatsList;
+            return;
+        }
+        // only copy elements if number of tables > 
prom_output_table_metrics_limit
+        PriorityQueue<OlapTable.Statistics> topStats = new 
PriorityQueue<>(limit,
+                Comparator.comparingLong(OlapTable.Statistics::getDataSize));
+        for (OlapTable.Statistics stats : newCloudTableStatsList) {
+            if (topStats.size() < limit) {
+                topStats.offer(stats);
+            } else if (!topStats.isEmpty() && stats.getDataSize() > 
topStats.peek().getDataSize()) {
+                topStats.poll();
+                topStats.offer(stats);
+            }
+        }
+        this.cloudTableStatsList = new ArrayList<>(topStats);
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudTabletRebalancer.java
 
b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudTabletRebalancer.java
index e4a92b1ed9b..50cb79fb616 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudTabletRebalancer.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudTabletRebalancer.java
@@ -312,10 +312,10 @@ public class CloudTabletRebalancer extends MasterDaemon {
 
     @Getter
     private class InfightTablet {
-        private final Long tabletId;
+        private final long tabletId;
         private final String clusterId;
 
-        public InfightTablet(Long tabletId, String clusterId) {
+        public InfightTablet(long tabletId, String clusterId) {
             this.tabletId = tabletId;
             this.clusterId = clusterId;
         }
@@ -329,7 +329,7 @@ public class CloudTabletRebalancer extends MasterDaemon {
                 return false;
             }
             InfightTablet that = (InfightTablet) o;
-            return tabletId.equals(that.tabletId) && 
clusterId.equals(that.clusterId);
+            return tabletId == that.tabletId && 
clusterId.equals(that.clusterId);
         }
 
         @Override
@@ -342,7 +342,6 @@ public class CloudTabletRebalancer extends MasterDaemon {
         public long pickedTabletId;
         public long srcBe;
         public long destBe;
-        public Map<Long, Set<Long>> beToTablets;
         public long startTimestamp;
         BalanceType balanceType;
     }
@@ -1950,8 +1949,7 @@ public class CloudTabletRebalancer extends MasterDaemon {
                         futurePartitionToTablets, futureBeToTabletsInTable)) {
                     continue;
                 }
-                boolean moved = preheatAndUpdateTablet(pickedTabletId, srcBe, 
destBe,
-                        clusterId, balanceType, beToTablets);
+                boolean moved = preheatAndUpdateTablet(pickedTabletId, srcBe, 
destBe, clusterId, balanceType);
                 if (moved) {
                     updateBalanceStatus(balanceType);
                 }
@@ -2065,7 +2063,7 @@ public class CloudTabletRebalancer extends MasterDaemon {
     }
 
     private boolean preheatAndUpdateTablet(long pickedTabletId, long srcBe, 
long destBe, String clusterId,
-                                     BalanceType balanceType, Map<Long, 
Set<Long>> beToTablets) {
+                                     BalanceType balanceType) {
         Backend srcBackend = cloudSystemInfoService.getBackend(srcBe);
         Backend destBackend = cloudSystemInfoService.getBackend(destBe);
         if (srcBackend == null || destBackend == null) {
@@ -2079,7 +2077,6 @@ public class CloudTabletRebalancer extends MasterDaemon {
         task.srcBe = srcBe;
         task.destBe = destBe;
         task.balanceType = balanceType;
-        task.beToTablets = beToTablets;
         task.startTimestamp = System.currentTimeMillis() / 1000;
         InfightTablet key = new InfightTablet(pickedTabletId, clusterId);
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/metric/PrometheusMetricVisitor.java 
b/fe/fe-core/src/main/java/org/apache/doris/metric/PrometheusMetricVisitor.java
index 764002aaed0..2103dcb6c21 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/metric/PrometheusMetricVisitor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/metric/PrometheusMetricVisitor.java
@@ -35,12 +35,10 @@ import org.apache.logging.log4j.Logger;
 
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Comparator;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.PriorityQueue;
 import java.util.Set;
 import java.util.stream.Collectors;
 
@@ -252,30 +250,8 @@ public class PrometheusMetricVisitor extends MetricVisitor 
{
         StringBuilder tableRowCountBuilder = new StringBuilder();
 
         Collection<OlapTable.Statistics> values = 
tabletStatMgr.getCloudTableStats();
-        // calc totalTableSize
-        long totalTableSize = 0;
+        long totalTableSize = tabletStatMgr.getTotalTableSize();
         for (OlapTable.Statistics stats : values) {
-            totalTableSize += stats.getDataSize();
-        }
-        // output top N metrics
-        if (values.size() > Config.prom_output_table_metrics_limit) {
-            // only copy elements if number of tables > 
prom_output_table_metrics_limit
-            PriorityQueue<OlapTable.Statistics> topStats = new PriorityQueue<>(
-                    Config.prom_output_table_metrics_limit,
-                    
Comparator.comparingLong(OlapTable.Statistics::getDataSize));
-            for (OlapTable.Statistics stats : values) {
-                if (topStats.size() < Config.prom_output_table_metrics_limit) {
-                    topStats.offer(stats);
-                } else if (!topStats.isEmpty()
-                        && stats.getDataSize() > 
topStats.peek().getDataSize()) {
-                    topStats.poll();
-                    topStats.offer(stats);
-                }
-            }
-            values = topStats;
-        }
-        for (OlapTable.Statistics stats : values) {
-
             dataSizeBuilder.append("doris_fe_table_data_size{db_name=\"");
             dataSizeBuilder.append(stats.getDbName());
             dataSizeBuilder.append("\", table_name=\"");


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to