This is an automated email from the ASF dual-hosted git repository.

gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 7a3ee3640ee [fix](cloud) checkpoint save cloud tablet stats to image 
(#60705)
7a3ee3640ee is described below

commit 7a3ee3640ee421d6727e25daa41627b1d11d4c94
Author: meiyi <[email protected]>
AuthorDate: Sat Mar 21 02:47:46 2026 +0800

    [fix](cloud) checkpoint save cloud tablet stats to image (#60705)
---
 .../main/java/org/apache/doris/common/Config.java  |   8 ++
 .../apache/doris/catalog/CloudTabletStatMgr.java   |   7 ++
 .../apache/doris/cloud/catalog/CloudReplica.java   |   4 +
 .../java/org/apache/doris/master/Checkpoint.java   | 124 ++++++++++++++++++++-
 .../java/org/apache/doris/persist/Storage.java     |  19 ++++
 5 files changed, 159 insertions(+), 3 deletions(-)

diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java 
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 04dd9a2d610..2ad50b3a9bd 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -542,6 +542,14 @@ public class Config extends ConfigBase {
             "Randomly use V3 storage_format (ext_meta) for some tables in 
fuzzy tests to increase coverage"})
     public static boolean random_use_v3_storage_format = true;
 
+    @ConfField(mutable = true, masterOnly = true, description = {
+            "The stale threshold of checkpoint image file in cloud mode (in 
seconds). "
+                    + "If the image file is older than this threshold, a new 
checkpoint will be triggered "
+                    + "even if there are no new journals. This helps keep 
table version, partition version, "
+                    + "and tablet stats in the image up-to-date. If the value 
is less than or equal to 0, "
+                    + "this feature is disabled."})
+    public static long cloud_checkpoint_image_stale_threshold_seconds = 3600;
+
     @ConfField(mutable = true, masterOnly = true, description = {
             "Wait for the internal batch to be written before returning; "
                     + "insert into and stream load use group commit by 
default."})
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/CloudTabletStatMgr.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/CloudTabletStatMgr.java
index d6f0f9516a6..18da6784acf 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/CloudTabletStatMgr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/CloudTabletStatMgr.java
@@ -108,6 +108,13 @@ public class CloudTabletStatMgr extends MasterDaemon {
 
     @Override
     protected void runAfterCatalogReady() {
+        if (cloudTableStatsList.isEmpty()) {
+            // use tablet stats loaded from image to update table stats when 
fe start
+            // avoid that the table stats is empty for a long time since 
getAllTabletStats may consume a long time
+            LOG.info("cloud tablet stat is empty, will update stat info of all 
tables");
+            updateStatInfo(Env.getCurrentInternalCatalog().getDbIds());
+        }
+
         int version = Config.cloud_get_tablet_stats_version;
         LOG.info("cloud tablet stat begin with version: {}", version);
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudReplica.java 
b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudReplica.java
index 3a9699e8f84..c50b5d80923 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudReplica.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudReplica.java
@@ -72,6 +72,7 @@ public class CloudReplica extends Replica implements 
GsonPostProcessable {
     // last time to get tablet stats
     @Getter
     @Setter
+    @SerializedName(value = "gst")
     long lastGetTabletStatsTime = 0;
     /**
      * The index of {@link 
org.apache.doris.catalog.CloudTabletStatMgr#DEFAULT_INTERVAL_LADDER_MS} array.
@@ -82,9 +83,12 @@ public class CloudReplica extends Replica implements 
GsonPostProcessable {
      */
     @Getter
     @Setter
+    @SerializedName(value = "sii")
     int statsIntervalIndex = 0;
 
+    @SerializedName(value = "sc")
     private long segmentCount = 0L;
+    @SerializedName(value = "rsc")
     private long rowsetCount = 1L; // [0-1] rowset
 
     private static final Random rand = new Random();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java 
b/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java
index ec4126aa86d..0d21fa8094c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java
@@ -17,7 +17,16 @@
 
 package org.apache.doris.master;
 
+import org.apache.doris.catalog.Database;
 import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.MaterializedIndex;
+import org.apache.doris.catalog.MaterializedIndex.IndexExtState;
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.Partition;
+import org.apache.doris.catalog.Replica;
+import org.apache.doris.catalog.Table;
+import org.apache.doris.catalog.Tablet;
+import org.apache.doris.cloud.catalog.CloudReplica;
 import org.apache.doris.common.CheckpointException;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.FeConstants;
@@ -104,11 +113,23 @@ public class Checkpoint extends MasterDaemon {
             storage = new Storage(imageDir);
             // get max image version
             imageVersion = storage.getLatestImageSeq();
+            long latestImageCreateTime = storage.getLatestImageCreateTime();
             // get max finalized journal id
             checkPointVersion = editLog.getFinalizedJournalId();
-            LOG.info("last checkpoint journal id: {}, current finalized 
journal id: {}",
-                    imageVersion, checkPointVersion);
-            if (imageVersion >= checkPointVersion) {
+            LOG.info("last checkpoint journal id: {}, create timestamp: {}. 
current finalized journal id: {}",
+                    imageVersion, latestImageCreateTime, checkPointVersion);
+            if (imageVersion < checkPointVersion) {
+                LOG.info("Trigger checkpoint since last checkpoint journal id: 
{} is less than "
+                        + "current finalized journal id: {}", imageVersion, 
checkPointVersion);
+            } else if (Config.isCloudMode() && 
Config.cloud_checkpoint_image_stale_threshold_seconds > 0
+                    && latestImageCreateTime > 0 && 
((System.currentTimeMillis() - latestImageCreateTime)
+                    >= Config.cloud_checkpoint_image_stale_threshold_seconds * 
1000L)) {
+                // No new finalized journals beyond the latest image.
+                // But in cloud mode, we may still want to force a checkpoint 
if the latest image file is expired.
+                // This helps that image can keep the newer table version, 
partition version, tablet stats.
+                LOG.info("Trigger checkpoint in cloud mode because latest 
image is expired. "
+                        + "latestImageSeq: {}, latestImageCreateTime: {}", 
imageVersion, latestImageCreateTime);
+            } else {
                 return;
             }
         } catch (Throwable e) {
@@ -146,6 +167,7 @@ public class Checkpoint extends MasterDaemon {
                                 checkPointVersion, 
env.getReplayedJournalId()));
             }
             env.postProcessAfterMetadataReplayed(false);
+            postProcessCloudMetadata();
             latestImageFilePath = env.saveImage();
             replayedJournalId = env.getReplayedJournalId();
 
@@ -395,4 +417,100 @@ public class Checkpoint extends MasterDaemon {
     public ReentrantReadWriteLock getLock() {
         return lock;
     }
+
+    private void postProcessCloudMetadata() {
+        if (Config.isNotCloudMode()) {
+            return;
+        }
+        Env servingEnv = Env.getServingEnv();
+        if (servingEnv == null) {
+            LOG.warn("serving env is null, skip process cloud metadata for 
checkpoint");
+            return;
+        }
+        long start = System.currentTimeMillis();
+        for (Database db : env.getInternalCatalog().getDbs()) {
+            Database servingDb = 
servingEnv.getInternalCatalog().getDbNullable(db.getId());
+            if (servingDb == null) {
+                if (LOG.isDebugEnabled()) {
+                    LOG.debug("serving db is null. dbId: {}, dbName: {}", 
db.getId(), db.getFullName());
+                }
+                continue;
+            }
+
+            for (Table table : db.getTables()) {
+                Table servingTable = servingDb.getTableNullable(table.getId());
+                if (servingTable == null) {
+                    if (LOG.isDebugEnabled()) {
+                        LOG.debug("serving table is null. dbId: {}, table: 
{}", db.getId(), table);
+                    }
+                    continue;
+                }
+                if (!(table instanceof OlapTable) || !(servingTable instanceof 
OlapTable)) {
+                    continue;
+                }
+                OlapTable olapTable = (OlapTable) table;
+                OlapTable servingOlapTable = (OlapTable) servingTable;
+
+                List<Partition> partitions = olapTable.getAllPartitions();
+                for (Partition partition : partitions) {
+                    Partition servingPartition = 
servingOlapTable.getPartition(partition.getId());
+                    if (servingPartition == null) {
+                        if (LOG.isDebugEnabled()) {
+                            LOG.debug("serving partition is null. tableId: {}, 
partitionId: {}", table.getId(),
+                                    partition.getId());
+                        }
+                        continue;
+                    }
+                    // set tablet stats
+                    setTabletStats(table.getId(), partition, servingPartition);
+                }
+            }
+        }
+        LOG.info("post process cloud metadata for checkpoint finished. cost {} 
ms", System.currentTimeMillis() - start);
+    }
+
+    private void setTabletStats(long tableId, Partition partition, Partition 
servingPartition) {
+        for (MaterializedIndex index : 
partition.getMaterializedIndices(IndexExtState.VISIBLE)) {
+            MaterializedIndex servingIndex = 
servingPartition.getIndex(index.getId());
+            if (servingIndex == null) {
+                if (LOG.isDebugEnabled()) {
+                    LOG.debug("serving index is null. tableId: {}, 
partitionId: {}, indexId: {}", tableId,
+                            partition.getId(), index.getId());
+                }
+                continue;
+            }
+            for (Tablet tablet : index.getTablets()) {
+                Tablet servingTablet = servingIndex.getTablet(tablet.getId());
+                if (servingTablet == null) {
+                    if (LOG.isDebugEnabled()) {
+                        LOG.debug("serving tablet is null. tableId: {}, 
partitionId: {}, indexId: {}, tabletId: {}",
+                                tableId, partition.getId(), index.getId(), 
tablet.getId());
+                    }
+                    continue;
+                }
+                for (Replica replica : tablet.getReplicas()) {
+                    Replica servingReplica = 
servingTablet.getReplicaById(replica.getId());
+                    if (servingReplica == null) {
+                        if (LOG.isDebugEnabled()) {
+                            LOG.debug("serving replica is null. tableId: {}, 
partitionId: {}, indexId: {}, "
+                                            + "tabletId: {}, replicaId: {}", 
tableId, partition.getId(), index.getId(),
+                                    tablet.getId(), replica.getId());
+                        }
+                        continue;
+                    }
+                    replica.setDataSize(servingReplica.getDataSize());
+                    replica.setRowsetCount(servingReplica.getRowsetCount());
+                    replica.setSegmentCount(servingReplica.getSegmentCount());
+                    replica.setRowCount(servingReplica.getRowCount());
+                    
replica.setLocalInvertedIndexSize(servingReplica.getLocalInvertedIndexSize());
+                    
replica.setLocalSegmentSize(servingReplica.getLocalSegmentSize());
+                    // set last get stats time and stats interval index
+                    CloudReplica cloudReplica = (CloudReplica) replica;
+                    CloudReplica servingCloudReplica = (CloudReplica) 
servingReplica;
+                    
cloudReplica.setStatsIntervalIndex(servingCloudReplica.getStatsIntervalIndex());
+                    
cloudReplica.setLastGetTabletStatsTime(servingCloudReplica.getLastGetTabletStatsTime());
+                }
+            }
+        }
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/Storage.java 
b/fe/fe-core/src/main/java/org/apache/doris/persist/Storage.java
index 9f8cd558a57..e826b7e03c2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/persist/Storage.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/persist/Storage.java
@@ -68,6 +68,7 @@ public class Storage {
     private long editsSeq;
     private long latestImageSeq = 0;
     private long latestValidatedImageSeq = 0;
+    private long latestImageCreateTime = 0;
     private String metaDir;
     private List<Long> editsFileSequenceNumbers;
 
@@ -83,6 +84,15 @@ public class Storage {
         this.editsSeq = editsSeq;
         this.latestImageSeq = latestImageSeq;
         this.metaDir = metaDir;
+        // try to set latestImageCreateTime from the image file if it exists
+        try {
+            File img = getImageFile(latestImageSeq);
+            if (img != null && img.exists()) {
+                latestImageCreateTime = img.lastModified();
+            }
+        } catch (Exception e) {
+            // ignore; best-effort only
+        }
     }
 
     public Storage(String metaDir) throws IOException {
@@ -146,6 +156,7 @@ public class Storage {
                         imageIds.add(fileSeq);
                         if (latestImageSeq < fileSeq) {
                             latestImageSeq = fileSeq;
+                            latestImageCreateTime = child.lastModified();
                         }
                     } else if (name.startsWith(EDITS)) {
                         // Just record the sequence part of the file name
@@ -162,6 +173,14 @@ public class Storage {
         latestValidatedImageSeq = imageIds.size() < 2 ? 0 : 
imageIds.get(imageIds.size() - 2);
     }
 
+    /**
+     * Return latest image creation time in milliseconds since epoch.
+     * 0 means unknown.
+     */
+    public long getLatestImageCreateTime() {
+        return latestImageCreateTime;
+    }
+
     public int getClusterID() {
         return clusterID;
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to