This is an automated email from the ASF dual-hosted git repository.
siyao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 57d8a4144a HDDS-8832. [Snapshot] Synchronized add snapshotInfo to
snapshot chain and snapshot cache (#4909)
57d8a4144a is described below
commit 57d8a4144a2668e104f9ffeebb9e80134a3930da
Author: Hemant Kumar <[email protected]>
AuthorDate: Thu Jun 15 22:57:54 2023 -0700
HDDS-8832. [Snapshot] Synchronized add snapshotInfo to snapshot chain and
snapshot cache (#4909)
---
.../hadoop/ozone/om/SnapshotChainManager.java | 30 +++---
.../request/snapshot/OMSnapshotCreateRequest.java | 106 ++++++++++++++-------
.../apache/hadoop/ozone/om/TestSnapshotChain.java | 2 +-
3 files changed, 81 insertions(+), 57 deletions(-)
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/SnapshotChainManager.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/SnapshotChainManager.java
index 0e343631f6..c147c6fdc3 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/SnapshotChainManager.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/SnapshotChainManager.java
@@ -16,7 +16,6 @@
*/
package org.apache.hadoop.ozone.om;
-import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.hdds.utils.db.TableIterator;
import org.apache.hadoop.ozone.om.helpers.SnapshotInfo;
@@ -56,8 +55,7 @@ public class SnapshotChainManager {
private final ConcurrentMap<UUID, String> snapshotIdToTableKey;
private UUID latestGlobalSnapshotId;
- public SnapshotChainManager(OMMetadataManager metadataManager)
- throws IOException {
+ public SnapshotChainManager(OMMetadataManager metadataManager) {
globalSnapshotChain = Collections.synchronizedMap(new LinkedHashMap<>());
snapshotChainByPath = new ConcurrentHashMap<>();
latestSnapshotIdByPath = new ConcurrentHashMap<>();
@@ -230,8 +228,7 @@ public class SnapshotChainManager {
/**
* Loads the snapshot chain from SnapshotInfo table.
*/
- private void loadFromSnapshotInfoTable(OMMetadataManager metadataManager)
- throws IOException {
+ private void loadFromSnapshotInfoTable(OMMetadataManager metadataManager) {
// read from snapshotInfo table to populate
// snapshot chains - both global and local path
try (TableIterator<String, ? extends Table.KeyValue<String, SnapshotInfo>>
@@ -250,17 +247,22 @@ public class SnapshotChainManager {
for (SnapshotInfo snapshotInfo : snaps.values()) {
addSnapshot(snapshotInfo);
}
+ } catch (IOException ioException) {
+ // TODO: [SNAPSHOT] Fail gracefully.
+ throw new RuntimeException(ioException);
}
}
/**
* Add snapshot to snapshot chain.
*/
- public void addSnapshot(SnapshotInfo snapshotInfo) throws IOException {
+ public synchronized void addSnapshot(SnapshotInfo snapshotInfo)
+ throws IOException {
addSnapshotGlobal(snapshotInfo.getSnapshotId(),
snapshotInfo.getGlobalPreviousSnapshotId());
addSnapshotPath(snapshotInfo.getSnapshotPath(),
- snapshotInfo.getSnapshotId(),
snapshotInfo.getPathPreviousSnapshotId());
+ snapshotInfo.getSnapshotId(),
+ snapshotInfo.getPathPreviousSnapshotId());
// store snapshot ID to snapshot DB table key in the map
snapshotIdToTableKey.put(snapshotInfo.getSnapshotId(),
snapshotInfo.getTableKey());
@@ -269,7 +271,8 @@ public class SnapshotChainManager {
/**
* Delete snapshot from snapshot chain.
*/
- public boolean deleteSnapshot(SnapshotInfo snapshotInfo) throws IOException {
+ public synchronized boolean deleteSnapshot(SnapshotInfo snapshotInfo)
+ throws IOException {
boolean status = deleteSnapshotGlobal(snapshotInfo.getSnapshotId()) &&
deleteSnapshotPath(snapshotInfo.getSnapshotPath(),
snapshotInfo.getSnapshotId());
@@ -422,15 +425,4 @@ public class SnapshotChainManager {
public String getTableKey(UUID snapshotId) {
return snapshotIdToTableKey.get(snapshotId);
}
-
- @VisibleForTesting
- void loadSnapshotInfo(OMMetadataManager metadataManager) throws IOException {
- loadFromSnapshotInfoTable(metadataManager);
- }
-
- @VisibleForTesting
- public LinkedHashMap<UUID, SnapshotChainInfo> getSnapshotChainPath(
- String path) {
- return snapshotChainByPath.get(path);
- }
}
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java
index 5903edb520..ebe1dbc80a 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotCreateRequest.java
@@ -50,7 +50,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
-import java.util.Objects;
import java.util.UUID;
import static org.apache.hadoop.hdds.HddsUtils.fromProtobuf;
@@ -129,8 +128,6 @@ public class OMSnapshotCreateRequest extends
OMClientRequest {
IOException exception = null;
OmMetadataManagerImpl omMetadataManager = (OmMetadataManagerImpl)
ozoneManager.getMetadataManager();
- SnapshotChainManager snapshotChainManager =
- omMetadataManager.getSnapshotChainManager();
OMResponse.Builder omResponse = OmResponseUtil.getOMResponseBuilder(
getOmRequest());
@@ -157,26 +154,14 @@ public class OMSnapshotCreateRequest extends
OMClientRequest {
}
// Note down RDB latest transaction sequence number, which is used
- // as snapshot generation in the differ.
+ // as snapshot generation in the Differ.
final long dbLatestSequenceNumber =
((RDBStore) omMetadataManager.getStore()).getDb()
.getLatestSequenceNumber();
snapshotInfo.setDbTxSequenceNumber(dbLatestSequenceNumber);
- // Set previous path and global snapshot
- UUID latestPathSnapshot =
- snapshotChainManager.getLatestPathSnapshotId(snapshotPath);
- UUID latestGlobalSnapshot =
- snapshotChainManager.getLatestGlobalSnapshotId();
-
- snapshotInfo.setPathPreviousSnapshotId(latestPathSnapshot);
- snapshotInfo.setGlobalPreviousSnapshotId(latestGlobalSnapshot);
-
- snapshotChainManager.addSnapshot(snapshotInfo);
-
- omMetadataManager.getSnapshotInfoTable()
- .addCacheEntry(new CacheKey<>(key),
- CacheValue.get(transactionLogIndex, snapshotInfo));
+ addSnapshotInfoToSnapshotChainAndCache(omMetadataManager,
+ transactionLogIndex);
omResponse.setCreateSnapshotResponse(
CreateSnapshotResponse.newBuilder()
@@ -184,21 +169,6 @@ public class OMSnapshotCreateRequest extends
OMClientRequest {
omClientResponse = new OMSnapshotCreateResponse(
omResponse.build(), snapshotInfo);
} catch (IOException ex) {
- // Remove snapshot from the SnapshotChainManager in case of any failure.
- // It is possible that createSnapshot request fails after snapshot gets
- // added to snapshot chain manager because couldn't add it to cache/DB.
- // In that scenario, SnapshotChainManager#globalSnapshotId will point to
- // failed createSnapshot request's snapshotId but in actual it doesn't
- // exist in the SnapshotInfo table.
- // If it doesn't get removed, OM restart will crash on
- // SnapshotChainManager#loadFromSnapshotInfoTable because it could not
- // find the previous snapshot which doesn't exist because it was never
- // added to the SnapshotInfo table.
- if (Objects.equals(snapshotInfo.getSnapshotId(),
- snapshotChainManager.getLatestGlobalSnapshotId())) {
- removeSnapshotInfoFromSnapshotChainManager(snapshotChainManager,
- snapshotInfo);
- }
exception = ex;
omClientResponse = new OMSnapshotCreateResponse(
createErrorOMResponse(omResponse, exception));
@@ -220,17 +190,79 @@ public class OMSnapshotCreateRequest extends
OMClientRequest {
snapshotInfo.toAuditMap(), exception, userInfo));
if (exception == null) {
- LOG.info("Created snapshot '{}' under path '{}'",
- snapshotName, snapshotPath);
+ LOG.info("Created snapshot: '{}' with snapshotId: '{}' under path '{}'",
+ snapshotName, snapshotInfo.getSnapshotId(), snapshotPath);
omMetrics.incNumSnapshotActive();
} else {
omMetrics.incNumSnapshotCreateFails();
- LOG.error("Failed to create snapshot '{}' under path '{}'",
- snapshotName, snapshotPath);
+ LOG.error("Failed to create snapshot '{}' with snapshotId: '{}' under " +
+ "path '{}'",
+ snapshotName, snapshotInfo.getSnapshotId(), snapshotPath);
}
return omClientResponse;
}
+ /**
+ * Add snapshot to snapshot chain and snapshot cache as a single atomic
+ * operation.
+ * If it is not done as a single atomic operation, can cause data corruption
+ * if there is any failure in adding snapshot entry to cache.
+ * For example, when two threads create snapshots on different
+ * buckets.
+ * T-1: Thread-1 adds the snapshot-1 to chain first. Previous snapshot for
+ * snapshot-1 would be null (let's assume first snapshot)
+ * T-2: Thread-2 adds snapshot-2, previous would be snapshot-1
+ * T-3: Thread-1 tries to update cache with snapshot-1 and fails.
+ * T-4: Thread-2 tries to update cache with snapshot-2 and succeeds.
+ * T-5: Thread-1 removes the snapshot from chain because it failed to
+ * update the cache.
+ * Now, snapshot-2's previous is snapshot-1 but, it doesn't exist because
+ * it was removed at T-5.
+ */
+ private void addSnapshotInfoToSnapshotChainAndCache(
+ OmMetadataManagerImpl omMetadataManager,
+ long transactionLogIndex
+ ) throws IOException {
+ // It is synchronized on SnapshotChainManager object so that this block is
+ // synchronized with OMSnapshotPurgeResponse#cleanupSnapshotChain and only
+ // one of these two operation gets executed at a time otherwise we could be
+ // in similar situation explained above if snapshot gets deleted.
+ synchronized (omMetadataManager.getSnapshotChainManager()) {
+ SnapshotChainManager snapshotChainManager =
+ omMetadataManager.getSnapshotChainManager();
+
+ try {
+ UUID latestPathSnapshot =
+ snapshotChainManager.getLatestPathSnapshotId(snapshotPath);
+ UUID latestGlobalSnapshot =
+ snapshotChainManager.getLatestGlobalSnapshotId();
+
+ snapshotInfo.setPathPreviousSnapshotId(latestPathSnapshot);
+ snapshotInfo.setGlobalPreviousSnapshotId(latestGlobalSnapshot);
+
+ snapshotChainManager.addSnapshot(snapshotInfo);
+
+ omMetadataManager.getSnapshotInfoTable()
+ .addCacheEntry(new CacheKey<>(snapshotInfo.getTableKey()),
+ CacheValue.get(transactionLogIndex, snapshotInfo));
+ } catch (IOException ioException) {
+ // Remove snapshot from the SnapshotChainManager in case of any
failure.
+ // It is possible that createSnapshot request fails after snapshot gets
+ // added to snapshot chain manager because couldn't add it to cache/DB.
+ // In that scenario, SnapshotChainManager#globalSnapshotId will point
to
+ // failed createSnapshot request's snapshotId but in actual it doesn't
+ // exist in the SnapshotInfo table.
+ // If it doesn't get removed, OM restart will crash on
+ // SnapshotChainManager#loadFromSnapshotInfoTable because it could not
+ // find the previous snapshot which doesn't exist because it was never
+ // added to the SnapshotInfo table.
+ removeSnapshotInfoFromSnapshotChainManager(snapshotChainManager,
+ snapshotInfo);
+ throw ioException;
+ }
+ }
+ }
+
/**
* Removes the snapshot from the SnapshotChainManager.
* In case of any failure, it logs the exception as an error and swallow it.
diff --git
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestSnapshotChain.java
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestSnapshotChain.java
index 4a63eb6887..6613ffeb7e 100644
---
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestSnapshotChain.java
+++
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestSnapshotChain.java
@@ -261,7 +261,7 @@ public class TestSnapshotChain {
prevSnapshotID = snapshotID;
}
- chainManager.loadSnapshotInfo(omMetadataManager);
+ chainManager = new SnapshotChainManager(omMetadataManager);
// check if snapshots loaded correctly from snapshotInfoTable
assertEquals(snapshotID2, chainManager.getLatestGlobalSnapshotId());
assertEquals(snapshotID2, chainManager.nextGlobalSnapshot(snapshotID1));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]