HADOOP-12634. Change Lazy Rename Pending Operation Completion of WASB to address case of potential data loss due to partial copy. Contributed by Gaurav Kanade.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/978bbdfe Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/978bbdfe Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/978bbdfe Branch: refs/heads/HDFS-1312 Commit: 978bbdfeb2d12efd6e750da6a14849e072fb814b Parents: 67c9780 Author: cnauroth <[email protected]> Authored: Wed Jan 6 11:15:59 2016 -0800 Committer: cnauroth <[email protected]> Committed: Wed Jan 6 11:15:59 2016 -0800 ---------------------------------------------------------------------- hadoop-common-project/hadoop-common/CHANGES.txt | 4 +++ .../hadoop/fs/azure/NativeAzureFileSystem.java | 34 +------------------- .../fs/azure/TestNativeAzureFileSystemLive.java | 22 +++++++++++++ 3 files changed, 27 insertions(+), 33 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/978bbdfe/hadoop-common-project/hadoop-common/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 1b867f0..770d37d 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -1553,6 +1553,10 @@ Release 2.8.0 - UNRELEASED HADOOP-12689. S3 filesystem operations stopped working correctly (Matt Paduano via raviprak) + HADOOP-12634. Change Lazy Rename Pending Operation Completion of WASB to + address case of potential data loss due to partial copy + (Gaurav Kanade via cnauroth) + Release 2.7.3 - UNRELEASED INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/978bbdfe/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java index 73bc6b3..34791e5 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java @@ -536,45 +536,13 @@ public class NativeAzureFileSystem extends FileSystem { Path dstFile = fullPath(dstKey, fileName); boolean srcExists = fs.exists(srcFile); boolean dstExists = fs.exists(dstFile); - if (srcExists && !dstExists) { - + if(srcExists) { // Rename gets exclusive access (via a lease) for HBase write-ahead log // (WAL) file processing correctness. See the rename code for details. String srcName = fs.pathToKey(srcFile); String dstName = fs.pathToKey(dstFile); fs.getStoreInterface().rename(srcName, dstName, true, null); - } else if (srcExists && dstExists) { - - // Get a lease on source to block write access. - String srcName = fs.pathToKey(srcFile); - SelfRenewingLease lease = null; - try { - lease = fs.acquireLease(srcFile); - // Delete the file. This will free the lease too. - fs.getStoreInterface().delete(srcName, lease); - } catch(AzureException e) { - String errorCode = ""; - try { - StorageException e2 = (StorageException) e.getCause(); - errorCode = e2.getErrorCode(); - } catch(Exception e3) { - // do nothing if cast fails - } - // If the rename already finished do nothing - if(!errorCode.equals("BlobNotFound")){ - throw e; - } - } finally { - try { - if(lease != null){ - lease.free(); - } - } catch(StorageException e) { - LOG.warn("Unable to free lease because: " + e.getMessage()); - } - } } else if (!srcExists && dstExists) { - // The rename already finished, so do nothing. ; } else { http://git-wip-us.apache.org/repos/asf/hadoop/blob/978bbdfe/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemLive.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemLive.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemLive.java index 721cb5f..6baba33 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemLive.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemLive.java @@ -24,6 +24,8 @@ import static org.junit.Assert.assertTrue; import java.util.concurrent.CountDownLatch; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; @@ -43,6 +45,26 @@ public class TestNativeAzureFileSystemLive extends return AzureBlobStorageTestAccount.create(); } + @Test + public void testLazyRenamePendingCanOverwriteExistingFile() + throws Exception { + final String SRC_FILE_KEY = "srcFile"; + final String DST_FILE_KEY = "dstFile"; + Path srcPath = new Path(SRC_FILE_KEY); + FSDataOutputStream srcStream = fs.create(srcPath); + assertTrue(fs.exists(srcPath)); + Path dstPath = new Path(DST_FILE_KEY); + FSDataOutputStream dstStream = fs.create(dstPath); + assertTrue(fs.exists(dstPath)); + NativeAzureFileSystem nfs = (NativeAzureFileSystem)fs; + final String fullSrcKey = nfs.pathToKey(nfs.makeAbsolute(srcPath)); + final String fullDstKey = nfs.pathToKey(nfs.makeAbsolute(dstPath)); + nfs.getStoreInterface().rename(fullSrcKey, fullDstKey, true, null); + assertTrue(fs.exists(dstPath)); + assertFalse(fs.exists(srcPath)); + IOUtils.cleanup(null, srcStream); + IOUtils.cleanup(null, dstStream); + } /** * Tests fs.delete() function to delete a blob when another blob is holding a * lease on it. Delete if called without a lease should fail if another process
