This is an automated email from the ASF dual-hosted git repository. tingchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 22c4e446c69 Allow skip CRC check during segment load when server starting. (#16432) 22c4e446c69 is described below commit 22c4e446c69c1071a0ab531751ed323a7c576675 Author: Ting Chen <tingc...@uber.com> AuthorDate: Tue Sep 2 10:21:09 2025 -0700 Allow skip CRC check during segment load when server starting. (#16432) * Allow skip CRC check during segment load when server starting. * Fix unit tests. * Fix based on the comments. * Revise based on comments --- .../core/data/manager/BaseTableDataManager.java | 28 +++++++++++++++------- .../data/manager/BaseTableDataManagerTest.java | 2 ++ .../helix/HelixInstanceDataManagerConfig.java | 8 +++++++ .../config/instance/InstanceDataManagerConfig.java | 2 ++ 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java index e4db4f0b50a..0abd9bda1be 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java @@ -425,6 +425,12 @@ public abstract class BaseTableDataManager implements TableDataManager { _logger.info("Segment: {} has CRC: {} same as before, not replacing it", segmentName, localMetadata.getCrc()); return; } + if (!_instanceDataManagerConfig.shouldCheckCRCOnSegmentLoad()) { + _logger.info("Skipping replacing segment: {} even though its CRC has changed from: {} to: {} because " + + "instance.check.crc.on.segment.load is set to false", segmentName, localMetadata.getCrc(), + zkMetadata.getCrc()); + return; + } _logger.info("Replacing segment: {} because its CRC has changed from: {} to: {}", segmentName, localMetadata.getCrc(), zkMetadata.getCrc()); downloadAndLoadSegment(zkMetadata, indexLoadingConfig); @@ -806,7 +812,8 @@ public abstract class BaseTableDataManager implements TableDataManager { - Continue loading the segment from the index directory. */ boolean shouldDownload = - forceDownload || (isSegmentStatusCompleted(zkMetadata) && !hasSameCRC(zkMetadata, localMetadata)); + forceDownload || (isSegmentStatusCompleted(zkMetadata) && !hasSameCRC(zkMetadata, localMetadata) + && _instanceDataManagerConfig.shouldCheckCRCOnSegmentLoad()); if (shouldDownload) { // Create backup directory to handle failure of segment reloading. createBackup(indexDir); @@ -1205,20 +1212,25 @@ public abstract class BaseTableDataManager implements TableDataManager { - The "DONE" status confirms the COMMIT_END_METADATA call succeeded, and the segment is available either in deep storage or with a peer before discarding the local copy. + The only exception is if the server does not check CRC on segment load. Then: We need to fall back to downloading the segment from deep storage to load it. */ - if (segmentMetadata == null || (isSegmentStatusCompleted(zkMetadata) && !hasSameCRC(zkMetadata, segmentMetadata))) { - if (segmentMetadata == null) { - _logger.info("Segment: {} does not exist", segmentName); - } else if (!hasSameCRC(zkMetadata, segmentMetadata)) { - _logger.info("Segment: {} has CRC changed from: {} to: {}", segmentName, segmentMetadata.getCrc(), - zkMetadata.getCrc()); - } + if (segmentMetadata == null) { + _logger.info("Segment: {} does not exist", segmentName); closeSegmentDirectoryQuietly(segmentDirectory); return false; } + if (isSegmentStatusCompleted(zkMetadata) && !hasSameCRC(zkMetadata, segmentMetadata)) { + _logger.warn("Segment: {} has CRC changed from: {} to: {}", segmentName, segmentMetadata.getCrc(), + zkMetadata.getCrc()); + if (_instanceDataManagerConfig.shouldCheckCRCOnSegmentLoad()) { + closeSegmentDirectoryQuietly(segmentDirectory); + return false; + } + _logger.info("Skipping CRC check for segment: {} as configured. Proceed to load segment.", segmentName); + } try { // If the segment is still kept by the server, then we can diff --git a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java index 6224fd05b15..2064d393129 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java @@ -675,6 +675,8 @@ public class BaseTableDataManagerTest { private static InstanceDataManagerConfig createDefaultInstanceDataManagerConfig() { InstanceDataManagerConfig config = mock(InstanceDataManagerConfig.class); when(config.getInstanceDataDir()).thenReturn(TEMP_DIR.getAbsolutePath()); + // Check CRC matching on segment load time. + when(config.shouldCheckCRCOnSegmentLoad()).thenReturn(true); return config; } diff --git a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManagerConfig.java b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManagerConfig.java index 0ae3c648e2f..cb24365d94d 100644 --- a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManagerConfig.java +++ b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManagerConfig.java @@ -119,6 +119,9 @@ public class HelixInstanceDataManagerConfig implements InstanceDataManagerConfig private static final int DEFAULT_DELETED_TABLES_CACHE_TTL_MINUTES = 60; private static final int DEFAULT_DELETED_SEGMENTS_CACHE_SIZE = 10_000; private static final int DEFAULT_DELETED_SEGMENTS_CACHE_TTL_MINUTES = 2; + // By default, check CRC matching when loading segments. + private static final boolean DEFAULT_CHECK_CRC_ON_SEGMENT_LOAD = true; + private static final String CHECK_CRC_ON_SEGMENT_LOAD = "check.crc.on.segment.load"; private final PinotConfiguration _serverConfig; private final PinotConfiguration _upsertConfig; @@ -331,4 +334,9 @@ public class HelixInstanceDataManagerConfig implements InstanceDataManagerConfig public boolean isUploadSegmentToDeepStore() { return _serverConfig.getProperty(UPLOAD_SEGMENT_TO_DEEP_STORE, DEFAULT_UPLOAD_SEGMENT_TO_DEEP_STORE); } + + @Override + public boolean shouldCheckCRCOnSegmentLoad() { + return _serverConfig.getProperty(CHECK_CRC_ON_SEGMENT_LOAD, DEFAULT_CHECK_CRC_ON_SEGMENT_LOAD); + } } diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/instance/InstanceDataManagerConfig.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/instance/InstanceDataManagerConfig.java index 579e390bc0f..fcd7c6b61de 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/config/instance/InstanceDataManagerConfig.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/instance/InstanceDataManagerConfig.java @@ -86,4 +86,6 @@ public interface InstanceDataManagerConfig { Map<String, Map<String, String>> getTierConfigs(); boolean isUploadSegmentToDeepStore(); + + boolean shouldCheckCRCOnSegmentLoad(); } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org