This is an automated email from the ASF dual-hosted git repository.
smengcl pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new d01b3aefc77 HDDS-15314. Disable defrag DB metrics due to crash during
snapshot defrag (#10301)
d01b3aefc77 is described below
commit d01b3aefc77e6f7216041b28bbb76827ffef6644
Author: Siyao Meng <[email protected]>
AuthorDate: Thu May 21 02:52:52 2026 -0700
HDDS-15314. Disable defrag DB metrics due to crash during snapshot defrag
(#10301)
---
.../hadoop/ozone/om/OmMetadataManagerImpl.java | 29 +++++++++++++-
.../om/snapshot/defrag/SnapshotDefragService.java | 16 ++++++--
.../snapshot/defrag/TestSnapshotDefragService.java | 44 ++++++++++++++++++++++
3 files changed, 84 insertions(+), 5 deletions(-)
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java
index 1797acefa28..b76e5aa5262 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java
@@ -242,6 +242,12 @@ public static OmMetadataManagerImpl
createCheckpointMetadataManager(
public static OmMetadataManagerImpl createCheckpointMetadataManager(
OzoneConfiguration conf, DBCheckpoint checkpoint, boolean readOnly)
throws IOException {
+ return createCheckpointMetadataManager(conf, checkpoint, readOnly, true);
+ }
+
+ public static OmMetadataManagerImpl createCheckpointMetadataManager(
+ OzoneConfiguration conf, DBCheckpoint checkpoint, boolean readOnly,
+ boolean enableRocksDbMetrics) throws IOException {
Path path = checkpoint.getCheckpointLocation();
Path parent = path.getParent();
if (parent == null) {
@@ -254,7 +260,8 @@ public static OmMetadataManagerImpl
createCheckpointMetadataManager(
throw new IllegalStateException("DB checkpoint dir name should not "
+ "have been null. Checkpoint path is " + path);
}
- return new OmMetadataManagerImpl(conf, dir, name.toString(), readOnly);
+ return new OmMetadataManagerImpl(
+ conf, dir, name.toString(), readOnly, enableRocksDbMetrics);
}
protected OmMetadataManagerImpl(OzoneConfiguration conf, File dir, String
name) throws IOException {
@@ -271,6 +278,24 @@ protected OmMetadataManagerImpl(OzoneConfiguration conf,
File dir, String name)
*/
public OmMetadataManagerImpl(OzoneConfiguration conf, File dir, String name,
boolean readOnly)
throws IOException {
+ this(conf, dir, name, readOnly, true);
+ }
+
+ /**
+ * Metadata constructor for checkpoints.
+ *
+ * @param conf - Ozone conf.
+ * @param dir - Checkpoint parent directory.
+ * @param name - Checkpoint directory name.
+ * @param readOnly - Whether to open the checkpoint DB read-only.
+ * @param enableRocksDbMetrics - Whether to register generic RocksDB metrics.
+ * Pass false for transient checkpoint DBs whose column families may be
+ * dropped or recreated while the DB is open.
+ * @throws IOException
+ */
+ protected OmMetadataManagerImpl(OzoneConfiguration conf, File dir,
+ String name, boolean readOnly, boolean enableRocksDbMetrics)
+ throws IOException {
lock = new OmReadOnlyLock();
hierarchicalLockManager = new ReadOnlyHierarchicalResourceLockManager();
omEpoch = 0;
@@ -282,7 +307,7 @@ public OmMetadataManagerImpl(OzoneConfiguration conf, File
dir, String name, boo
.setMaxNumberOfOpenFiles(maxOpenFiles)
.setEnableCompactionDag(false, null)
.setCreateCheckpointDirs(false)
- .setEnableRocksDbMetrics(true)
+ .setEnableRocksDbMetrics(enableRocksDbMetrics)
.build();
initializeOmTables(CacheType.PARTIAL_CACHE, false);
perfMetrics = null;
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/defrag/SnapshotDefragService.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/defrag/SnapshotDefragService.java
index cd3f845dcbe..4f018ed4641 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/defrag/SnapshotDefragService.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/defrag/SnapshotDefragService.java
@@ -527,7 +527,7 @@ int atomicSwitchSnapshotDB(UUID snapshotId, Path
checkpointPath) throws IOExcept
RocksDBCheckpoint dbCheckpoint = new RocksDBCheckpoint(nextVersionPath);
// Add a new version to the local data file.
try (OmMetadataManagerImpl newVersionCheckpointMetadataManager =
- OmMetadataManagerImpl.createCheckpointMetadataManager(conf,
dbCheckpoint, true)) {
+ createDefragCheckpointMetadataManager(dbCheckpoint, true)) {
RDBStore newVersionCheckpointStore = (RDBStore)
newVersionCheckpointMetadataManager.getStore();
snapshotLocalDataProvider.addSnapshotVersion(newVersionCheckpointStore);
snapshotLocalDataProvider.commit();
@@ -549,6 +549,16 @@ public BackgroundTaskResult call() throws Exception {
}
}
+ @VisibleForTesting
+ OmMetadataManagerImpl createDefragCheckpointMetadataManager(
+ DBCheckpoint checkpoint, boolean readOnly) throws IOException {
+ // Defrag checkpoint DBs are transient and drop/recreate column families.
+ // Generic RocksDB metrics are not useful for them and can race with CF
handle
+ // lifetime changes while the checkpoint is being rewritten.
+ return OmMetadataManagerImpl.createCheckpointMetadataManager(
+ conf, checkpoint, readOnly, false);
+ }
+
/**
* Creates a new checkpoint by modifying the metadata manager from a
snapshot.
* This involves generating a temporary checkpoint and truncating specified
@@ -570,7 +580,7 @@ OmMetadataManagerImpl createCheckpoint(SnapshotInfo
snapshotInfo,
snapshotInfo.getVolumeName(), snapshotInfo.getBucketName(),
snapshotInfo.getName())) {
DBCheckpoint checkpoint =
snapshot.get().getMetadataManager().getStore().getCheckpoint(tmpDefragDir,
true);
try (OmMetadataManagerImpl metadataManagerBeforeTruncate =
- OmMetadataManagerImpl.createCheckpointMetadataManager(conf,
checkpoint, false)) {
+ createDefragCheckpointMetadataManager(checkpoint, false)) {
DBStore dbStore = metadataManagerBeforeTruncate.getStore();
for (String table : metadataManagerBeforeTruncate.listTableNames()) {
if (!incrementalColumnFamilies.contains(table)) {
@@ -581,7 +591,7 @@ OmMetadataManagerImpl createCheckpoint(SnapshotInfo
snapshotInfo,
throw new IOException("Failed to close checkpoint of snapshot: " +
snapshotInfo.getSnapshotId(), e);
}
// This will recreate the column families in the checkpoint.
- return OmMetadataManagerImpl.createCheckpointMetadataManager(conf,
checkpoint, false);
+ return createDefragCheckpointMetadataManager(checkpoint, false);
}
}
diff --git
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/defrag/TestSnapshotDefragService.java
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/defrag/TestSnapshotDefragService.java
index 224de13840a..f93d95d6ac9 100644
---
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/defrag/TestSnapshotDefragService.java
+++
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/defrag/TestSnapshotDefragService.java
@@ -76,6 +76,7 @@
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.hdds.StringUtils;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.utils.RocksDBStoreMetrics;
import org.apache.hadoop.hdds.utils.db.CodecBuffer;
import org.apache.hadoop.hdds.utils.db.CodecBufferCodec;
import org.apache.hadoop.hdds.utils.db.CodecException;
@@ -91,6 +92,7 @@
import org.apache.hadoop.hdds.utils.db.StringInMemoryTestTable;
import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.hdds.utils.db.TablePrefixInfo;
+import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.ozone.om.OMMetadataManager;
import org.apache.hadoop.ozone.om.OMPerformanceMetrics;
import org.apache.hadoop.ozone.om.OmMetadataManagerImpl;
@@ -458,6 +460,20 @@ private static Stream<Arguments>
testCreateCheckpointCases() {
);
}
+ private static String rocksDBMetricsSourceName(Path dbLocation) {
+ return RocksDBStoreMetrics.ROCKSDB_CONTEXT_PREFIX +
dbLocation.getFileName();
+ }
+
+ private static void assertNoRocksDBMetrics(Path dbLocation) {
+ assertNull(DefaultMetricsSystem.instance().getSource(
+ rocksDBMetricsSourceName(dbLocation)));
+ }
+
+ private static void assertRocksDBMetricsRegistered(Path dbLocation) {
+ assertNotNull(DefaultMetricsSystem.instance().getSource(
+ rocksDBMetricsSourceName(dbLocation)));
+ }
+
private Map<String, Map<String, String>> createTableContents(Path path,
String keyPrefix) throws IOException {
DBCheckpoint snapshotCheckpointLocation = new RocksDBCheckpoint(path);
Map<String, Map<String, String>> tableContents = new HashMap<>();
@@ -525,7 +541,35 @@ public void close() {
.filter(e -> !incrementalTables.contains(e.getKey()))
.forEach(e -> e.getValue().clear());
assertContents(tableContents, result.getStore());
+ assertNoRocksDBMetrics(result.getStore().getDbLocation().toPath());
+ }
+ }
+
+ @Test
+ public void testDefragCheckpointMetadataManagerSkipsRocksDBMetrics() throws
Exception {
+ Path checkpointPath = tempDir.resolve("defrag-metrics-" +
UUID.randomUUID());
+ createTableContents(checkpointPath, "_metrics_");
+
+ assertNoRocksDBMetrics(checkpointPath);
+ // The generic checkpoint path should keep the existing behavior and
+ // register RocksDB metrics.
+ try (OmMetadataManagerImpl defaultCheckpointMetadataManager =
+ OmMetadataManagerImpl.createCheckpointMetadataManager(
+ configuration, new RocksDBCheckpoint(checkpointPath), false))
{
+ assertRocksDBMetricsRegistered(
+
defaultCheckpointMetadataManager.getStore().getDbLocation().toPath());
+ }
+ assertNoRocksDBMetrics(checkpointPath);
+
+ // Defrag checkpoint DBs are transient and must not register generic
+ // RocksDB metrics.
+ try (OmMetadataManagerImpl defragCheckpointMetadataManager =
+ defragService.createDefragCheckpointMetadataManager(
+ new RocksDBCheckpoint(checkpointPath), false)) {
+ assertNoRocksDBMetrics(
+ defragCheckpointMetadataManager.getStore().getDbLocation().toPath());
}
+ assertNoRocksDBMetrics(checkpointPath);
}
private void assertContents(Map<String, Map<String, String>> contents, Path
path) throws IOException {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]