[
https://issues.apache.org/jira/browse/HADOOP-18893?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17769140#comment-17769140
]
ASF GitHub Bot commented on HADOOP-18893:
-----------------------------------------
steveloughran commented on code in PR #6061:
URL: https://github.com/apache/hadoop/pull/6061#discussion_r1337094944
##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicyDefault.java:
##########
@@ -347,31 +354,42 @@ private void createCheckpoint(Path trashRoot, Date date)
throws IOException {
while (true) {
try {
fs.rename(current, checkpoint, Rename.NONE);
- LOG.info("Created trash checkpoint: " + checkpoint.toUri().getPath());
+ LOG.info("Created trash checkpoint: {}", checkpoint.toUri().getPath());
break;
} catch (FileAlreadyExistsException e) {
if (++attempt > 1000) {
- throw new IOException("Failed to checkpoint trash: " + checkpoint);
+ throw new IOException("Failed to checkpoint trash: " + checkpoint,
e);
}
checkpoint = checkpointBase.suffix("-" + attempt);
}
}
}
- private void deleteCheckpoint(Path trashRoot, boolean deleteImmediately)
+ /**
+ * Delete trash directories under a checkpoint older than the interval,
+ * or, if {@code deleteImmediately} is true, all entries.
+ * It is not an error if invoked on a trash root which doesn't exist.
+ * @param trashRoot trash root.
+ * @param deleteImmediately should all entries be deleted
+ * @return the number of entries deleted.
+ * @throws IOException failure in listing or delete() calls
+ */
+ protected int deleteCheckpoint(Path trashRoot, boolean deleteImmediately)
throws IOException {
- LOG.info("TrashPolicyDefault#deleteCheckpoint for trashRoot: " +
trashRoot);
+ LOG.info("TrashPolicyDefault#deleteCheckpoint for trashRoot: {}",
trashRoot);
- FileStatus[] dirs = null;
+ RemoteIterator<FileStatus> dirs;
try {
- dirs = fs.listStatus(trashRoot); // scan trash sub-directories
+ dirs = fs.listStatusIterator(trashRoot); // scan trash sub-directories
} catch (FileNotFoundException fnfe) {
- return;
+ return 0;
Review Comment:
note that the listStatus calls may not raise the FNFE until the next/hasNext
call, so catch it in the iterator below too
##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicy.java:
##########
@@ -139,23 +191,38 @@ public Path getCurrentTrashDir(Path path) throws
IOException {
@Deprecated
public static TrashPolicy getInstance(Configuration conf, FileSystem fs,
Path home) {
Class<? extends TrashPolicy> trashClass = conf.getClass(
- "fs.trash.classname", TrashPolicyDefault.class, TrashPolicy.class);
+ FS_TRASH_CLASSNAME, TrashPolicyDefault.class, TrashPolicy.class);
TrashPolicy trash = ReflectionUtils.newInstance(trashClass, conf);
trash.initialize(conf, fs, home); // initialize TrashPolicy
return trash;
}
/**
- * Get an instance of the configured TrashPolicy based on the value
- * of the configuration parameter fs.trash.classname.
- *
- * @param conf the configuration to be used
+ * Get an instance of the configured TrashPolicy based on the value of
+ * the configuration parameter
+ * <ol>
+ * <li>{@code fs.${fs.getUri().getScheme()}.trash.classname}</li>
+ * <li>{@code fs.trash.classname}</li>
+ * </ol>
+ * The configuration passed in is used to look up both values and load
+ * in the policy class, not that of the FileSystem instance.
+ * @param conf the configuration to be used for lookup and classloading
* @param fs the file system to be used
* @return an instance of TrashPolicy
*/
+ @SuppressWarnings("ClassReferencesSubclass")
public static TrashPolicy getInstance(Configuration conf, FileSystem fs) {
Review Comment:
does the conf come from the filesystem? as if so it'd let us do per-bucket
stuff. if not, well, it's possibly too late
##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java:
##########
@@ -3450,23 +3454,29 @@ public Path getTrashRoot(Path path) {
public Collection<FileStatus> getTrashRoots(boolean allUsers) {
Path userHome = new Path(getHomeDirectory().toUri().getPath());
List<FileStatus> ret = new ArrayList<>();
+ // an operation to look up a path status and add it to the return list
Review Comment:
nice bit of work
##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ATrash.java:
##########
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import java.io.IOException;
+
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.EmptyTrashPolicy;
+import org.apache.hadoop.fs.Trash;
+
+/**
+ * Test Trash for S3AFilesystem.
+ */
+public class TestS3ATrash extends AbstractS3ATestBase {
+
+ /**
+ * Test default Trash Policy for S3AFilesystem is Empty.
+ */
+ @Test
+ public void testTrashSetToEmptyTrashPolicy() throws IOException {
+ Configuration conf = new Configuration();
+ Trash trash = new Trash(getFileSystem(), conf);
+ assertEquals("Mismatch in Trash Policy set by the config",
+ trash.getTrashPolicy().getClass(), EmptyTrashPolicy.class);
Review Comment:
prefer assertJ
##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicyDefault.java:
##########
@@ -347,31 +354,42 @@ private void createCheckpoint(Path trashRoot, Date date)
throws IOException {
while (true) {
try {
fs.rename(current, checkpoint, Rename.NONE);
- LOG.info("Created trash checkpoint: " + checkpoint.toUri().getPath());
+ LOG.info("Created trash checkpoint: {}", checkpoint.toUri().getPath());
break;
} catch (FileAlreadyExistsException e) {
if (++attempt > 1000) {
Review Comment:
lets make this a constant
##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicyDefault.java:
##########
@@ -193,7 +193,7 @@ public boolean moveToTrash(Path path) throws IOException {
// move to current trash
fs.rename(path, trashPath,
Review Comment:
should check return value here. "false" isn't that useful, but it does mean
the rename failed
##########
hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAbfsTrash.java:
##########
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.azurebfs;
+
+import java.io.IOException;
+
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.EmptyTrashPolicy;
+import org.apache.hadoop.fs.Trash;
+
+/**
+ * Tests to verify behaviour of Trash with AzureBlobFileSystem.
+ */
+public class TestAbfsTrash extends AbstractAbfsIntegrationTest {
+
+ public TestAbfsTrash() throws Exception {}
+
+ /**
+ * Test default Trash Policy for S3AFilesystem is Empty.
+ */
+ @Test
+ public void testTrashSetToEmptyTrashPolicy() throws IOException {
+ Configuration conf = new Configuration();
+ Trash trash = new Trash(getFileSystem(), conf);
Review Comment:
use conf from filesystem
##########
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ATrash.java:
##########
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import java.io.IOException;
+
+import org.junit.Test;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.EmptyTrashPolicy;
+import org.apache.hadoop.fs.Trash;
+
+/**
+ * Test Trash for S3AFilesystem.
+ */
+public class TestS3ATrash extends AbstractS3ATestBase {
Review Comment:
all subclasses of AbstractS3ATestBase are itests; you may not have noticed
it, but it is true.
##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicyDefault.java:
##########
@@ -347,31 +354,42 @@ private void createCheckpoint(Path trashRoot, Date date)
throws IOException {
while (true) {
try {
fs.rename(current, checkpoint, Rename.NONE);
Review Comment:
again, handle failure here
##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicyDefault.java:
##########
@@ -393,16 +411,34 @@ private void deleteCheckpoint(Path trashRoot, boolean
deleteImmediately)
}
if (((now - deletionInterval) > time) || deleteImmediately) {
- if (fs.delete(path, true)) {
- LOG.info("Deleted trash checkpoint: "+dir);
- } else {
- LOG.warn("Couldn't delete checkpoint: " + dir + " Ignoring.");
- }
+ deleteCheckpoint(path);
}
}
+ return counter;
}
- private long getTimeFromCheckpoint(String name) throws ParseException {
+ /**
+ * Delete a checkpoint
+ * @param path path to delete
+ * @throws IOException IO exception raised in delete call.
+ */
+ protected void deleteCheckpoint(final Path path) throws IOException {
+ String dir = path.toUri().getPath();
+ if (getFileSystem().delete(path, true)) {
+ LOG.info("Deleted trash checkpoint: {}", path);
+ } else {
+ LOG.warn("Couldn't delete checkpoint: {}. Ignoring.", path);
Review Comment:
add a check to see if path exists. if it doesn't exist any more, just log at
info rather than warn
##########
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicyDefault.java:
##########
@@ -393,16 +411,34 @@ private void deleteCheckpoint(Path trashRoot, boolean
deleteImmediately)
}
if (((now - deletionInterval) > time) || deleteImmediately) {
- if (fs.delete(path, true)) {
- LOG.info("Deleted trash checkpoint: "+dir);
- } else {
- LOG.warn("Couldn't delete checkpoint: " + dir + " Ignoring.");
- }
+ deleteCheckpoint(path);
}
}
+ return counter;
}
- private long getTimeFromCheckpoint(String name) throws ParseException {
+ /**
+ * Delete a checkpoint
+ * @param path path to delete
+ * @throws IOException IO exception raised in delete call.
+ */
+ protected void deleteCheckpoint(final Path path) throws IOException {
+ String dir = path.toUri().getPath();
+ if (getFileSystem().delete(path, true)) {
+ LOG.info("Deleted trash checkpoint: {}", path);
+ } else {
+ LOG.warn("Couldn't delete checkpoint: {}. Ignoring.", path);
+ }
+ }
+
+ /**
+ * parse the name of a checkpoint to extgact its timestamp.
+ * Uses the Hadoop 0.23 checkpoint as well as the older version (!).
Review Comment:
could be time to return the older version code if it is over complex
> Make Trash Policy pluggable for different FileSystems
> -----------------------------------------------------
>
> Key: HADOOP-18893
> URL: https://issues.apache.org/jira/browse/HADOOP-18893
> Project: Hadoop Common
> Issue Type: Sub-task
> Components: fs, fs/azure, fs/s3
> Reporter: Mehakmeet Singh
> Assignee: Mehakmeet Singh
> Priority: Major
> Labels: pull-request-available
>
> Add capability for Trash Policy to be pluggable using a property leveraging
> the schema of different filesystems.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]