mukul1987 commented on a change in pull request #2165:
URL: https://github.com/apache/hadoop/pull/2165#discussion_r461799999
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
##########
@@ -382,6 +395,14 @@ public void deleteSnapshot(final INodesInPath iip, final
String snapshotName,
EnumSet.of(XAttrSetFlag.CREATE, XAttrSetFlag.REPLACE));
Review comment:
What will be the affect of deleting the snapshot twice ?
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
##########
@@ -7166,6 +7175,29 @@ void deleteSnapshot(String snapshotRoot, String
snapshotName,
logAuditEvent(true, operationName, rootPath, null, null);
}
+ public void gcDeletedSnapshot(String snapshotRoot, String snapshotName)
+ throws IOException {
+ final String operationName = "gcDeletedSnapshot";
+ String rootPath = null;
+ final INode.BlocksMapUpdateInfo blocksToBeDeleted;
+
+ checkOperation(OperationCategory.WRITE);
+ writeLock();
+ try {
+ checkOperation(OperationCategory.WRITE);
+ rootPath = Snapshot.getSnapshotPath(snapshotRoot, snapshotName);
+ checkNameNodeSafeMode("Cannot gcDeletedSnapshot for " + rootPath);
+
+ final long now = Time.now();
Review comment:
Lets assert that xattr is set on the snapshot and this is the first
snapshot to be deleted.
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
##########
@@ -382,6 +395,14 @@ public void deleteSnapshot(final INodesInPath iip, final
String snapshotName,
EnumSet.of(XAttrSetFlag.CREATE, XAttrSetFlag.REPLACE));
return;
}
+
+ // assert if it is deleting the first snapshot
+ final INodeDirectoryAttributes first =
snapshottable.getDiffs().getFirstSnapshotINode();
+ if (snapshot.getRoot() != first) {
+ throw new IllegalStateException("Failed to delete snapshot " +
snapshotName
Review comment:
This assertion should also be inside gcDeletedSnapshot ?
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDeletionGc.java
##########
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.snapshot;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.util.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import static
org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager.DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS;
+import static
org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager.DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS_DEFAULT;
+
+public class SnapshotDeletionGc {
+ public static final Logger LOG = LoggerFactory.getLogger(
+ SnapshotDeletionGc.class);
+
+ private final FSNamesystem namesystem;
+ private final long deletionOrderedGcPeriodMs;
+ private final AtomicReference<Timer> timer = new AtomicReference<>();
+
+ public SnapshotDeletionGc(FSNamesystem namesystem, Configuration conf) {
+ this.namesystem = namesystem;
+
+ this.deletionOrderedGcPeriodMs = conf.getLong(
+ DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS,
+ DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS_DEFAULT);
+ LOG.info("{} = {}", DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS,
+ deletionOrderedGcPeriodMs);
+ }
+
+ public void schedule() {
+ if (timer.get() != null) {
+ return;
+ }
+ final Timer t = new Timer(getClass().getSimpleName(), true);
+ if (timer.compareAndSet(null, t)) {
+ LOG.info("Schedule at fixed rate {}",
+ StringUtils.formatTime(deletionOrderedGcPeriodMs));
+ t.scheduleAtFixedRate(new GcTask(),
+ deletionOrderedGcPeriodMs, deletionOrderedGcPeriodMs);
+ }
+ }
+
+ public void cancel() {
+ final Timer t = timer.getAndSet(null);
+ if (t != null) {
+ LOG.info("cancel");
+ t.cancel();
+ }
+ }
+
+ private void gcDeletedSnapshot(String name) {
+ final Snapshot.Root deleted;
+ namesystem.readLock();
+ try {
+ deleted = namesystem.getSnapshotManager().chooseDeletedSnapshot();
+ } finally {
Review comment:
lets catch Throwable here and log error in case we get an exception here.
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
##########
@@ -382,6 +395,14 @@ public void deleteSnapshot(final INodesInPath iip, final
String snapshotName,
EnumSet.of(XAttrSetFlag.CREATE, XAttrSetFlag.REPLACE));
return;
}
+
Review comment:
I feel all the deletion should happen via one code path, i.e. the
background thread. some deletion from the user thread and others from
background is error prone.
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDeletionGc.java
##########
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.snapshot;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.util.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import static
org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager.DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS;
+import static
org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager.DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS_DEFAULT;
+
+public class SnapshotDeletionGc {
+ public static final Logger LOG = LoggerFactory.getLogger(
+ SnapshotDeletionGc.class);
+
+ private final FSNamesystem namesystem;
+ private final long deletionOrderedGcPeriodMs;
+ private final AtomicReference<Timer> timer = new AtomicReference<>();
+
+ public SnapshotDeletionGc(FSNamesystem namesystem, Configuration conf) {
+ this.namesystem = namesystem;
+
+ this.deletionOrderedGcPeriodMs = conf.getLong(
+ DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS,
+ DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS_DEFAULT);
+ LOG.info("{} = {}", DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS,
+ deletionOrderedGcPeriodMs);
+ }
+
+ public void schedule() {
+ if (timer.get() != null) {
+ return;
+ }
+ final Timer t = new Timer(getClass().getSimpleName(), true);
+ if (timer.compareAndSet(null, t)) {
+ LOG.info("Schedule at fixed rate {}",
+ StringUtils.formatTime(deletionOrderedGcPeriodMs));
+ t.scheduleAtFixedRate(new GcTask(),
+ deletionOrderedGcPeriodMs, deletionOrderedGcPeriodMs);
+ }
+ }
+
+ public void cancel() {
+ final Timer t = timer.getAndSet(null);
+ if (t != null) {
+ LOG.info("cancel");
+ t.cancel();
+ }
+ }
+
+ private void gcDeletedSnapshot(String name) {
+ final Snapshot.Root deleted;
+ namesystem.readLock();
+ try {
+ deleted = namesystem.getSnapshotManager().chooseDeletedSnapshot();
+ } finally {
+ namesystem.readUnlock();
+ }
+ if (deleted == null) {
+ LOG.trace("{}: no snapshots are marked as deleted.", name);
+ return;
+ }
+
+ final String snapshotRoot = deleted.getRootFullPathName();
+ final String snapshotName = deleted.getLocalName();
+ LOG.info("{}: delete snapshot {} from {}",
+ name, snapshotName, snapshotRoot);
+
+ try {
+ namesystem.gcDeletedSnapshot(snapshotRoot, snapshotName);
+ } catch (Throwable e) {
+ LOG.warn("Failed to gcDeletedSnapshot " + deleted.getFullPathName(), e);
Review comment:
Lets change this to error and abort and exit from the namenode if the
deletion is not possible.
I feel we should do this earlier to identify issues with snapshot deletion
code.
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
##########
@@ -267,6 +280,20 @@ public INodeDirectory getSnapshottableRoot(final
INodesInPath iip)
return dir;
}
+ public void assertMarkedAsDeleted(INodesInPath iip, String snapshotName)
+ throws IOException {
+ final INodeDirectory dir = getSnapshottableRoot(iip);
+ final Snapshot.Root snapshotRoot = dir.getDirectorySnapshottableFeature()
+ .getSnapshotByName(dir, snapshotName)
+ .getRoot();
+
+ if (snapshotRoot.isMarkedAsDeleted()) {
Review comment:
The check here should be !snapshotRoot.isMarkedAsDeleted() ?
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotDeletionGc.java
##########
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.snapshot;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.util.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import static
org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager.DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS;
+import static
org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager.DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS_DEFAULT;
+
+public class SnapshotDeletionGc {
+ public static final Logger LOG = LoggerFactory.getLogger(
+ SnapshotDeletionGc.class);
+
+ private final FSNamesystem namesystem;
+ private final long deletionOrderedGcPeriodMs;
+ private final AtomicReference<Timer> timer = new AtomicReference<>();
+
+ public SnapshotDeletionGc(FSNamesystem namesystem, Configuration conf) {
+ this.namesystem = namesystem;
+
+ this.deletionOrderedGcPeriodMs = conf.getLong(
+ DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS,
+ DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS_DEFAULT);
+ LOG.info("{} = {}", DFS_NAMENODE_SNAPSHOT_DELETION_ORDERED_GC_PERIOD_MS,
+ deletionOrderedGcPeriodMs);
+ }
+
+ public void schedule() {
+ if (timer.get() != null) {
+ return;
+ }
+ final Timer t = new Timer(getClass().getSimpleName(), true);
+ if (timer.compareAndSet(null, t)) {
+ LOG.info("Schedule at fixed rate {}",
+ StringUtils.formatTime(deletionOrderedGcPeriodMs));
+ t.scheduleAtFixedRate(new GcTask(),
+ deletionOrderedGcPeriodMs, deletionOrderedGcPeriodMs);
+ }
+ }
+
+ public void cancel() {
+ final Timer t = timer.getAndSet(null);
+ if (t != null) {
+ LOG.info("cancel");
+ t.cancel();
+ }
+ }
+
+ private void gcDeletedSnapshot(String name) {
+ final Snapshot.Root deleted;
+ namesystem.readLock();
+ try {
+ deleted = namesystem.getSnapshotManager().chooseDeletedSnapshot();
+ } catch (Throwable e) {
+ LOG.error("Failed to chooseDeletedSnapshot", e);
+ throw e;
+ } finally {
+ namesystem.readUnlock();
+ }
+ if (deleted == null) {
+ LOG.trace("{}: no snapshots are marked as deleted.", name);
+ return;
+ }
+
+ final String snapshotRoot = deleted.getRootFullPathName();
+ final String snapshotName = deleted.getLocalName();
+ LOG.info("{}: delete snapshot {} from {}",
+ name, snapshotName, snapshotRoot);
+
+ try {
+ namesystem.gcDeletedSnapshot(snapshotRoot, snapshotName);
+ } catch (Throwable e) {
+ LOG.error("Failed to gcDeletedSnapshot " + deleted.getFullPathName(), e);
Review comment:
If the snapshot delete for the same snapname failed multiple times after
the namenode came out of safemode. Should we exit the NN process ?
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
##########
@@ -382,6 +395,14 @@ public void deleteSnapshot(final INodesInPath iip, final
String snapshotName,
EnumSet.of(XAttrSetFlag.CREATE, XAttrSetFlag.REPLACE));
return;
}
+
Review comment:
I agree that code paths are similar. However, just wanted to discuss
this once. :) The only thing which might not work is the sleep between every
snap deletes here. It may happen that the user operation snap delete and the
background thread based snap delete are not time spaced correctly. But that is
not a big issue anyways.
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
##########
@@ -7166,6 +7175,30 @@ void deleteSnapshot(String snapshotRoot, String
snapshotName,
logAuditEvent(true, operationName, rootPath, null, null);
}
+ public void gcDeletedSnapshot(String snapshotRoot, String snapshotName)
+ throws IOException {
+ final String operationName = "gcDeletedSnapshot";
+ String rootPath = null;
+ final INode.BlocksMapUpdateInfo blocksToBeDeleted;
+
+ checkOperation(OperationCategory.WRITE);
+ writeLock();
+ try {
+ checkOperation(OperationCategory.WRITE);
+ rootPath = Snapshot.getSnapshotPath(snapshotRoot, snapshotName);
+ checkNameNodeSafeMode("Cannot gcDeletedSnapshot for " + rootPath);
+
+ final long now = Time.now();
+ final INodesInPath iip = dir.resolvePath(null, snapshotRoot,
DirOp.WRITE);
+ snapshotManager.assertMarkedAsDeleted(iip, snapshotName);
+ blocksToBeDeleted = FSDirSnapshotOp.deleteSnapshot(
Review comment:
should we also add an assert inside FSDirSnapshotOp.deleteSnapshot,
which is common to both the code paths. That with the ordered delete flag set
to true. The snapshot being deleted is the first snapshot in the list ?
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
##########
@@ -7166,6 +7175,30 @@ void deleteSnapshot(String snapshotRoot, String
snapshotName,
logAuditEvent(true, operationName, rootPath, null, null);
}
+ public void gcDeletedSnapshot(String snapshotRoot, String snapshotName)
+ throws IOException {
+ final String operationName = "gcDeletedSnapshot";
+ String rootPath = null;
+ final INode.BlocksMapUpdateInfo blocksToBeDeleted;
+
+ checkOperation(OperationCategory.WRITE);
+ writeLock();
+ try {
+ checkOperation(OperationCategory.WRITE);
+ rootPath = Snapshot.getSnapshotPath(snapshotRoot, snapshotName);
+ checkNameNodeSafeMode("Cannot gcDeletedSnapshot for " + rootPath);
+
+ final long now = Time.now();
+ final INodesInPath iip = dir.resolvePath(null, snapshotRoot,
DirOp.WRITE);
+ snapshotManager.assertMarkedAsDeleted(iip, snapshotName);
+ blocksToBeDeleted = FSDirSnapshotOp.deleteSnapshot(
Review comment:
It is already inside SnapshotManager#deleteSnapshot
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]