This is an automated email from the ASF dual-hosted git repository.

tingchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 22c4e446c69 Allow skip CRC check during segment load when server 
starting. (#16432)
22c4e446c69 is described below

commit 22c4e446c69c1071a0ab531751ed323a7c576675
Author: Ting Chen <tingc...@uber.com>
AuthorDate: Tue Sep 2 10:21:09 2025 -0700

    Allow skip CRC check during segment load when server starting. (#16432)
    
    * Allow skip CRC check during segment load when server starting.
    
    * Fix unit tests.
    
    * Fix based on the comments.
    
    * Revise based on comments
---
 .../core/data/manager/BaseTableDataManager.java    | 28 +++++++++++++++-------
 .../data/manager/BaseTableDataManagerTest.java     |  2 ++
 .../helix/HelixInstanceDataManagerConfig.java      |  8 +++++++
 .../config/instance/InstanceDataManagerConfig.java |  2 ++
 4 files changed, 32 insertions(+), 8 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java
index e4db4f0b50a..0abd9bda1be 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java
@@ -425,6 +425,12 @@ public abstract class BaseTableDataManager implements 
TableDataManager {
       _logger.info("Segment: {} has CRC: {} same as before, not replacing it", 
segmentName, localMetadata.getCrc());
       return;
     }
+    if (!_instanceDataManagerConfig.shouldCheckCRCOnSegmentLoad()) {
+      _logger.info("Skipping replacing segment: {} even though its CRC has 
changed from: {} to: {} because "
+          + "instance.check.crc.on.segment.load is set to false", segmentName, 
localMetadata.getCrc(),
+          zkMetadata.getCrc());
+      return;
+    }
     _logger.info("Replacing segment: {} because its CRC has changed from: {} 
to: {}", segmentName,
         localMetadata.getCrc(), zkMetadata.getCrc());
     downloadAndLoadSegment(zkMetadata, indexLoadingConfig);
@@ -806,7 +812,8 @@ public abstract class BaseTableDataManager implements 
TableDataManager {
       - Continue loading the segment from the index directory.
       */
       boolean shouldDownload =
-          forceDownload || (isSegmentStatusCompleted(zkMetadata) && 
!hasSameCRC(zkMetadata, localMetadata));
+          forceDownload || (isSegmentStatusCompleted(zkMetadata) && 
!hasSameCRC(zkMetadata, localMetadata)
+              && _instanceDataManagerConfig.shouldCheckCRCOnSegmentLoad());
       if (shouldDownload) {
         // Create backup directory to handle failure of segment reloading.
         createBackup(indexDir);
@@ -1205,20 +1212,25 @@ public abstract class BaseTableDataManager implements 
TableDataManager {
        - The "DONE" status confirms the COMMIT_END_METADATA call succeeded,
          and the segment is available either in deep storage or with a peer
          before discarding the local copy.
+       The only exception is if the server does not check CRC on segment load.
 
     Then:
     We need to fall back to downloading the segment from deep storage to load 
it.
     */
-    if (segmentMetadata == null || (isSegmentStatusCompleted(zkMetadata) && 
!hasSameCRC(zkMetadata, segmentMetadata))) {
-      if (segmentMetadata == null) {
-        _logger.info("Segment: {} does not exist", segmentName);
-      } else if (!hasSameCRC(zkMetadata, segmentMetadata)) {
-        _logger.info("Segment: {} has CRC changed from: {} to: {}", 
segmentName, segmentMetadata.getCrc(),
-            zkMetadata.getCrc());
-      }
+    if (segmentMetadata == null) {
+      _logger.info("Segment: {} does not exist", segmentName);
       closeSegmentDirectoryQuietly(segmentDirectory);
       return false;
     }
+    if (isSegmentStatusCompleted(zkMetadata) && !hasSameCRC(zkMetadata, 
segmentMetadata)) {
+      _logger.warn("Segment: {} has CRC changed from: {} to: {}", segmentName, 
segmentMetadata.getCrc(),
+          zkMetadata.getCrc());
+      if (_instanceDataManagerConfig.shouldCheckCRCOnSegmentLoad()) {
+        closeSegmentDirectoryQuietly(segmentDirectory);
+        return false;
+      }
+      _logger.info("Skipping CRC check for segment: {} as configured. Proceed 
to load segment.", segmentName);
+    }
 
     try {
       // If the segment is still kept by the server, then we can
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java
index 6224fd05b15..2064d393129 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java
@@ -675,6 +675,8 @@ public class BaseTableDataManagerTest {
   private static InstanceDataManagerConfig 
createDefaultInstanceDataManagerConfig() {
     InstanceDataManagerConfig config = mock(InstanceDataManagerConfig.class);
     when(config.getInstanceDataDir()).thenReturn(TEMP_DIR.getAbsolutePath());
+    // Check CRC matching on segment load time.
+    when(config.shouldCheckCRCOnSegmentLoad()).thenReturn(true);
     return config;
   }
 
diff --git 
a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManagerConfig.java
 
b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManagerConfig.java
index 0ae3c648e2f..cb24365d94d 100644
--- 
a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManagerConfig.java
+++ 
b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManagerConfig.java
@@ -119,6 +119,9 @@ public class HelixInstanceDataManagerConfig implements 
InstanceDataManagerConfig
   private static final int DEFAULT_DELETED_TABLES_CACHE_TTL_MINUTES = 60;
   private static final int DEFAULT_DELETED_SEGMENTS_CACHE_SIZE = 10_000;
   private static final int DEFAULT_DELETED_SEGMENTS_CACHE_TTL_MINUTES = 2;
+  // By default, check CRC matching when loading segments.
+  private static final boolean DEFAULT_CHECK_CRC_ON_SEGMENT_LOAD = true;
+  private static final String CHECK_CRC_ON_SEGMENT_LOAD = 
"check.crc.on.segment.load";
 
   private final PinotConfiguration _serverConfig;
   private final PinotConfiguration _upsertConfig;
@@ -331,4 +334,9 @@ public class HelixInstanceDataManagerConfig implements 
InstanceDataManagerConfig
   public boolean isUploadSegmentToDeepStore() {
     return _serverConfig.getProperty(UPLOAD_SEGMENT_TO_DEEP_STORE, 
DEFAULT_UPLOAD_SEGMENT_TO_DEEP_STORE);
   }
+
+  @Override
+  public boolean shouldCheckCRCOnSegmentLoad() {
+    return _serverConfig.getProperty(CHECK_CRC_ON_SEGMENT_LOAD, 
DEFAULT_CHECK_CRC_ON_SEGMENT_LOAD);
+  }
 }
diff --git 
a/pinot-spi/src/main/java/org/apache/pinot/spi/config/instance/InstanceDataManagerConfig.java
 
b/pinot-spi/src/main/java/org/apache/pinot/spi/config/instance/InstanceDataManagerConfig.java
index 579e390bc0f..fcd7c6b61de 100644
--- 
a/pinot-spi/src/main/java/org/apache/pinot/spi/config/instance/InstanceDataManagerConfig.java
+++ 
b/pinot-spi/src/main/java/org/apache/pinot/spi/config/instance/InstanceDataManagerConfig.java
@@ -86,4 +86,6 @@ public interface InstanceDataManagerConfig {
   Map<String, Map<String, String>> getTierConfigs();
 
   boolean isUploadSegmentToDeepStore();
+
+  boolean shouldCheckCRCOnSegmentLoad();
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to