[21/50] [abbrv] hadoop git commit: HDFS-12070. Failed block recovery leaves files open indefinitely and at risk for data loss. Contributed by Kihwal Lee.
HDFS-12070. Failed block recovery leaves files open indefinitely and at risk for data loss. Contributed by Kihwal Lee. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/362272bc Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/362272bc Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/362272bc Branch: refs/heads/YARN-7055 Commit: 362272bc3afc87e3fe15f557366c0bcd0a87a238 Parents: dc86ff4 Author: Kihwal LeeAuthored: Mon Feb 26 10:28:04 2018 -0600 Committer: Rohith Sharma K S Committed: Fri Mar 2 11:08:28 2018 +0530 -- .../server/datanode/BlockRecoveryWorker.java| 6 +-- .../apache/hadoop/hdfs/TestLeaseRecovery.java | 44 2 files changed, 46 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/362272bc/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java index 2ecd986..94835e2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java @@ -307,10 +307,8 @@ public class BlockRecoveryWorker { } } - // If any of the data-nodes failed, the recovery fails, because - // we never know the actual state of the replica on failed data-nodes. - // The recovery should be started over. - if (!failedList.isEmpty()) { + // Abort if all failed. + if (successList.isEmpty()) { throw new IOException("Cannot recover " + block + ", the following datanodes failed: " + failedList); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/362272bc/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java index d62194c..c82b47c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java @@ -228,6 +228,50 @@ public class TestLeaseRecovery { } /** + * Block/lease recovery should be retried with failed nodes from the second + * stage removed to avoid perpetual recovery failures. + */ + @Test + public void testBlockRecoveryRetryAfterFailedRecovery() throws Exception { +Configuration conf = new Configuration(); +cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); +Path file = new Path("/testBlockRecoveryRetryAfterFailedRecovery"); +DistributedFileSystem dfs = cluster.getFileSystem(); + +// Create a file. +FSDataOutputStream out = dfs.create(file); +final int FILE_SIZE = 128 * 1024; +int count = 0; +while (count < FILE_SIZE) { + out.writeBytes("DE K9SUL"); + count += 8; +} +out.hsync(); + +// Abort the original stream. +((DFSOutputStream) out.getWrappedStream()).abort(); + +LocatedBlocks locations = cluster.getNameNodeRpc().getBlockLocations( +file.toString(), 0, count); +ExtendedBlock block = locations.get(0).getBlock(); + +// Finalize one replica to simulate a partial close failure. +cluster.getDataNodes().get(0).getFSDataset().finalizeBlock(block, false); +// Delete the meta file to simulate a rename/move failure. +cluster.deleteMeta(0, block); + +// Try to recover the lease. +DistributedFileSystem newDfs = (DistributedFileSystem) FileSystem +.newInstance(cluster.getConfiguration(0)); +count = 0; +while (count++ < 15 && !newDfs.recoverLease(file)) { + Thread.sleep(1000); +} +// The lease should have been recovered. +assertTrue("File should be closed", newDfs.recoverLease(file)); + } + + /** * Recover the lease on a file and append file from another client. */ @Test - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
[58/59] [abbrv] hadoop git commit: HDFS-12070. Failed block recovery leaves files open indefinitely and at risk for data loss. Contributed by Kihwal Lee.
HDFS-12070. Failed block recovery leaves files open indefinitely and at risk for data loss. Contributed by Kihwal Lee. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/451265a8 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/451265a8 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/451265a8 Branch: refs/heads/HDFS-7240 Commit: 451265a83d8798624ae2a144bc58fa41db826704 Parents: 2fa7963 Author: Kihwal LeeAuthored: Mon Feb 26 10:28:04 2018 -0600 Committer: Kihwal Lee Committed: Mon Feb 26 10:28:04 2018 -0600 -- .../server/datanode/BlockRecoveryWorker.java| 6 +-- .../apache/hadoop/hdfs/TestLeaseRecovery.java | 44 2 files changed, 46 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/451265a8/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java index 2ecd986..94835e2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java @@ -307,10 +307,8 @@ public class BlockRecoveryWorker { } } - // If any of the data-nodes failed, the recovery fails, because - // we never know the actual state of the replica on failed data-nodes. - // The recovery should be started over. - if (!failedList.isEmpty()) { + // Abort if all failed. + if (successList.isEmpty()) { throw new IOException("Cannot recover " + block + ", the following datanodes failed: " + failedList); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/451265a8/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java index d62194c..c82b47c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java @@ -228,6 +228,50 @@ public class TestLeaseRecovery { } /** + * Block/lease recovery should be retried with failed nodes from the second + * stage removed to avoid perpetual recovery failures. + */ + @Test + public void testBlockRecoveryRetryAfterFailedRecovery() throws Exception { +Configuration conf = new Configuration(); +cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); +Path file = new Path("/testBlockRecoveryRetryAfterFailedRecovery"); +DistributedFileSystem dfs = cluster.getFileSystem(); + +// Create a file. +FSDataOutputStream out = dfs.create(file); +final int FILE_SIZE = 128 * 1024; +int count = 0; +while (count < FILE_SIZE) { + out.writeBytes("DE K9SUL"); + count += 8; +} +out.hsync(); + +// Abort the original stream. +((DFSOutputStream) out.getWrappedStream()).abort(); + +LocatedBlocks locations = cluster.getNameNodeRpc().getBlockLocations( +file.toString(), 0, count); +ExtendedBlock block = locations.get(0).getBlock(); + +// Finalize one replica to simulate a partial close failure. +cluster.getDataNodes().get(0).getFSDataset().finalizeBlock(block, false); +// Delete the meta file to simulate a rename/move failure. +cluster.deleteMeta(0, block); + +// Try to recover the lease. +DistributedFileSystem newDfs = (DistributedFileSystem) FileSystem +.newInstance(cluster.getConfiguration(0)); +count = 0; +while (count++ < 15 && !newDfs.recoverLease(file)) { + Thread.sleep(1000); +} +// The lease should have been recovered. +assertTrue("File should be closed", newDfs.recoverLease(file)); + } + + /** * Recover the lease on a file and append file from another client. */ @Test - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
hadoop git commit: HDFS-12070. Failed block recovery leaves files open indefinitely and at risk for data loss. Contributed by Kihwal Lee.
Repository: hadoop Updated Branches: refs/heads/branch-2.8 23a658c4e -> 4722cd9f3 HDFS-12070. Failed block recovery leaves files open indefinitely and at risk for data loss. Contributed by Kihwal Lee. (cherry picked from commit 4b43f2aa566322317a7f3163027bf5fd0a247207) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/4722cd9f Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/4722cd9f Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/4722cd9f Branch: refs/heads/branch-2.8 Commit: 4722cd9f35a8ff3efb106fe297d48b73c849f776 Parents: 23a658c Author: Kihwal LeeAuthored: Mon Feb 26 11:15:06 2018 -0600 Committer: Kihwal Lee Committed: Mon Feb 26 11:16:44 2018 -0600 -- .../server/datanode/BlockRecoveryWorker.java| 6 +-- .../apache/hadoop/hdfs/TestLeaseRecovery.java | 44 2 files changed, 46 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/4722cd9f/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java index 86fead2..b19e51d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java @@ -294,10 +294,8 @@ public class BlockRecoveryWorker { } } - // If any of the data-nodes failed, the recovery fails, because - // we never know the actual state of the replica on failed data-nodes. - // The recovery should be started over. - if (!failedList.isEmpty()) { + // Abort if all failed. + if (successList.isEmpty()) { StringBuilder b = new StringBuilder(); for(DatanodeID id : failedList) { b.append("\n " + id); http://git-wip-us.apache.org/repos/asf/hadoop/blob/4722cd9f/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java index d62194c..c82b47c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java @@ -228,6 +228,50 @@ public class TestLeaseRecovery { } /** + * Block/lease recovery should be retried with failed nodes from the second + * stage removed to avoid perpetual recovery failures. + */ + @Test + public void testBlockRecoveryRetryAfterFailedRecovery() throws Exception { +Configuration conf = new Configuration(); +cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); +Path file = new Path("/testBlockRecoveryRetryAfterFailedRecovery"); +DistributedFileSystem dfs = cluster.getFileSystem(); + +// Create a file. +FSDataOutputStream out = dfs.create(file); +final int FILE_SIZE = 128 * 1024; +int count = 0; +while (count < FILE_SIZE) { + out.writeBytes("DE K9SUL"); + count += 8; +} +out.hsync(); + +// Abort the original stream. +((DFSOutputStream) out.getWrappedStream()).abort(); + +LocatedBlocks locations = cluster.getNameNodeRpc().getBlockLocations( +file.toString(), 0, count); +ExtendedBlock block = locations.get(0).getBlock(); + +// Finalize one replica to simulate a partial close failure. +cluster.getDataNodes().get(0).getFSDataset().finalizeBlock(block, false); +// Delete the meta file to simulate a rename/move failure. +cluster.deleteMeta(0, block); + +// Try to recover the lease. +DistributedFileSystem newDfs = (DistributedFileSystem) FileSystem +.newInstance(cluster.getConfiguration(0)); +count = 0; +while (count++ < 15 && !newDfs.recoverLease(file)) { + Thread.sleep(1000); +} +// The lease should have been recovered. +assertTrue("File should be closed", newDfs.recoverLease(file)); + } + + /** * Recover the lease on a file and append file from another client. */ @Test - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail:
hadoop git commit: HDFS-12070. Failed block recovery leaves files open indefinitely and at risk for data loss. Contributed by Kihwal Lee.
Repository: hadoop Updated Branches: refs/heads/branch-2.9 627a32375 -> a6343ff80 HDFS-12070. Failed block recovery leaves files open indefinitely and at risk for data loss. Contributed by Kihwal Lee. (cherry picked from commit 4b43f2aa566322317a7f3163027bf5fd0a247207) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/a6343ff8 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/a6343ff8 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/a6343ff8 Branch: refs/heads/branch-2.9 Commit: a6343ff808dcdabfa11b0f713a445cdb30474fa7 Parents: 627a323 Author: Kihwal LeeAuthored: Mon Feb 26 10:59:09 2018 -0600 Committer: Kihwal Lee Committed: Mon Feb 26 10:59:47 2018 -0600 -- .../server/datanode/BlockRecoveryWorker.java| 6 +-- .../apache/hadoop/hdfs/TestLeaseRecovery.java | 44 2 files changed, 46 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/a6343ff8/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java index aa36247..8d218ae 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java @@ -293,10 +293,8 @@ public class BlockRecoveryWorker { } } - // If any of the data-nodes failed, the recovery fails, because - // we never know the actual state of the replica on failed data-nodes. - // The recovery should be started over. - if (!failedList.isEmpty()) { + // Abort if all failed. + if (successList.isEmpty()) { StringBuilder b = new StringBuilder(); for(DatanodeID id : failedList) { b.append("\n " + id); http://git-wip-us.apache.org/repos/asf/hadoop/blob/a6343ff8/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java index d62194c..c82b47c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java @@ -228,6 +228,50 @@ public class TestLeaseRecovery { } /** + * Block/lease recovery should be retried with failed nodes from the second + * stage removed to avoid perpetual recovery failures. + */ + @Test + public void testBlockRecoveryRetryAfterFailedRecovery() throws Exception { +Configuration conf = new Configuration(); +cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); +Path file = new Path("/testBlockRecoveryRetryAfterFailedRecovery"); +DistributedFileSystem dfs = cluster.getFileSystem(); + +// Create a file. +FSDataOutputStream out = dfs.create(file); +final int FILE_SIZE = 128 * 1024; +int count = 0; +while (count < FILE_SIZE) { + out.writeBytes("DE K9SUL"); + count += 8; +} +out.hsync(); + +// Abort the original stream. +((DFSOutputStream) out.getWrappedStream()).abort(); + +LocatedBlocks locations = cluster.getNameNodeRpc().getBlockLocations( +file.toString(), 0, count); +ExtendedBlock block = locations.get(0).getBlock(); + +// Finalize one replica to simulate a partial close failure. +cluster.getDataNodes().get(0).getFSDataset().finalizeBlock(block, false); +// Delete the meta file to simulate a rename/move failure. +cluster.deleteMeta(0, block); + +// Try to recover the lease. +DistributedFileSystem newDfs = (DistributedFileSystem) FileSystem +.newInstance(cluster.getConfiguration(0)); +count = 0; +while (count++ < 15 && !newDfs.recoverLease(file)) { + Thread.sleep(1000); +} +// The lease should have been recovered. +assertTrue("File should be closed", newDfs.recoverLease(file)); + } + + /** * Recover the lease on a file and append file from another client. */ @Test - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail:
hadoop git commit: HDFS-12070. Failed block recovery leaves files open indefinitely and at risk for data loss. Contributed by Kihwal Lee.
Repository: hadoop Updated Branches: refs/heads/branch-2 79af42f09 -> 4b43f2aa5 HDFS-12070. Failed block recovery leaves files open indefinitely and at risk for data loss. Contributed by Kihwal Lee. (cherry picked from commit 451265a83d8798624ae2a144bc58fa41db826704) Conflicts: hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/4b43f2aa Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/4b43f2aa Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/4b43f2aa Branch: refs/heads/branch-2 Commit: 4b43f2aa566322317a7f3163027bf5fd0a247207 Parents: 79af42f Author: Kihwal LeeAuthored: Mon Feb 26 10:58:07 2018 -0600 Committer: Kihwal Lee Committed: Mon Feb 26 10:58:07 2018 -0600 -- .../server/datanode/BlockRecoveryWorker.java| 6 +-- .../apache/hadoop/hdfs/TestLeaseRecovery.java | 44 2 files changed, 46 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/4b43f2aa/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java index aa36247..8d218ae 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java @@ -293,10 +293,8 @@ public class BlockRecoveryWorker { } } - // If any of the data-nodes failed, the recovery fails, because - // we never know the actual state of the replica on failed data-nodes. - // The recovery should be started over. - if (!failedList.isEmpty()) { + // Abort if all failed. + if (successList.isEmpty()) { StringBuilder b = new StringBuilder(); for(DatanodeID id : failedList) { b.append("\n " + id); http://git-wip-us.apache.org/repos/asf/hadoop/blob/4b43f2aa/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java index d62194c..c82b47c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java @@ -228,6 +228,50 @@ public class TestLeaseRecovery { } /** + * Block/lease recovery should be retried with failed nodes from the second + * stage removed to avoid perpetual recovery failures. + */ + @Test + public void testBlockRecoveryRetryAfterFailedRecovery() throws Exception { +Configuration conf = new Configuration(); +cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); +Path file = new Path("/testBlockRecoveryRetryAfterFailedRecovery"); +DistributedFileSystem dfs = cluster.getFileSystem(); + +// Create a file. +FSDataOutputStream out = dfs.create(file); +final int FILE_SIZE = 128 * 1024; +int count = 0; +while (count < FILE_SIZE) { + out.writeBytes("DE K9SUL"); + count += 8; +} +out.hsync(); + +// Abort the original stream. +((DFSOutputStream) out.getWrappedStream()).abort(); + +LocatedBlocks locations = cluster.getNameNodeRpc().getBlockLocations( +file.toString(), 0, count); +ExtendedBlock block = locations.get(0).getBlock(); + +// Finalize one replica to simulate a partial close failure. +cluster.getDataNodes().get(0).getFSDataset().finalizeBlock(block, false); +// Delete the meta file to simulate a rename/move failure. +cluster.deleteMeta(0, block); + +// Try to recover the lease. +DistributedFileSystem newDfs = (DistributedFileSystem) FileSystem +.newInstance(cluster.getConfiguration(0)); +count = 0; +while (count++ < 15 && !newDfs.recoverLease(file)) { + Thread.sleep(1000); +} +// The lease should have been recovered. +assertTrue("File should be closed", newDfs.recoverLease(file)); + } + + /** * Recover the lease on a file and append file from another client. */ @Test
hadoop git commit: HDFS-12070. Failed block recovery leaves files open indefinitely and at risk for data loss. Contributed by Kihwal Lee.
Repository: hadoop Updated Branches: refs/heads/branch-3.0 21d4b5fd2 -> 1087b9af8 HDFS-12070. Failed block recovery leaves files open indefinitely and at risk for data loss. Contributed by Kihwal Lee. (cherry picked from commit 451265a83d8798624ae2a144bc58fa41db826704) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/1087b9af Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/1087b9af Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/1087b9af Branch: refs/heads/branch-3.0 Commit: 1087b9af8c34742bdcf90f2e5b809bddb9f79315 Parents: 21d4b5f Author: Kihwal LeeAuthored: Mon Feb 26 10:30:50 2018 -0600 Committer: Kihwal Lee Committed: Mon Feb 26 10:30:50 2018 -0600 -- .../server/datanode/BlockRecoveryWorker.java| 6 +-- .../apache/hadoop/hdfs/TestLeaseRecovery.java | 44 2 files changed, 46 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/1087b9af/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java index 2ecd986..94835e2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java @@ -307,10 +307,8 @@ public class BlockRecoveryWorker { } } - // If any of the data-nodes failed, the recovery fails, because - // we never know the actual state of the replica on failed data-nodes. - // The recovery should be started over. - if (!failedList.isEmpty()) { + // Abort if all failed. + if (successList.isEmpty()) { throw new IOException("Cannot recover " + block + ", the following datanodes failed: " + failedList); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/1087b9af/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java index d62194c..c82b47c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java @@ -228,6 +228,50 @@ public class TestLeaseRecovery { } /** + * Block/lease recovery should be retried with failed nodes from the second + * stage removed to avoid perpetual recovery failures. + */ + @Test + public void testBlockRecoveryRetryAfterFailedRecovery() throws Exception { +Configuration conf = new Configuration(); +cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); +Path file = new Path("/testBlockRecoveryRetryAfterFailedRecovery"); +DistributedFileSystem dfs = cluster.getFileSystem(); + +// Create a file. +FSDataOutputStream out = dfs.create(file); +final int FILE_SIZE = 128 * 1024; +int count = 0; +while (count < FILE_SIZE) { + out.writeBytes("DE K9SUL"); + count += 8; +} +out.hsync(); + +// Abort the original stream. +((DFSOutputStream) out.getWrappedStream()).abort(); + +LocatedBlocks locations = cluster.getNameNodeRpc().getBlockLocations( +file.toString(), 0, count); +ExtendedBlock block = locations.get(0).getBlock(); + +// Finalize one replica to simulate a partial close failure. +cluster.getDataNodes().get(0).getFSDataset().finalizeBlock(block, false); +// Delete the meta file to simulate a rename/move failure. +cluster.deleteMeta(0, block); + +// Try to recover the lease. +DistributedFileSystem newDfs = (DistributedFileSystem) FileSystem +.newInstance(cluster.getConfiguration(0)); +count = 0; +while (count++ < 15 && !newDfs.recoverLease(file)) { + Thread.sleep(1000); +} +// The lease should have been recovered. +assertTrue("File should be closed", newDfs.recoverLease(file)); + } + + /** * Recover the lease on a file and append file from another client. */ @Test - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail:
hadoop git commit: HDFS-12070. Failed block recovery leaves files open indefinitely and at risk for data loss. Contributed by Kihwal Lee.
Repository: hadoop Updated Branches: refs/heads/branch-3.1 cb260a2d3 -> 33f82323b HDFS-12070. Failed block recovery leaves files open indefinitely and at risk for data loss. Contributed by Kihwal Lee. (cherry picked from commit 451265a83d8798624ae2a144bc58fa41db826704) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/33f82323 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/33f82323 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/33f82323 Branch: refs/heads/branch-3.1 Commit: 33f82323b0db22f1dc884ba59bbc367311c0 Parents: cb260a2 Author: Kihwal LeeAuthored: Mon Feb 26 10:29:28 2018 -0600 Committer: Kihwal Lee Committed: Mon Feb 26 10:29:28 2018 -0600 -- .../server/datanode/BlockRecoveryWorker.java| 6 +-- .../apache/hadoop/hdfs/TestLeaseRecovery.java | 44 2 files changed, 46 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/33f82323/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java index 2ecd986..94835e2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java @@ -307,10 +307,8 @@ public class BlockRecoveryWorker { } } - // If any of the data-nodes failed, the recovery fails, because - // we never know the actual state of the replica on failed data-nodes. - // The recovery should be started over. - if (!failedList.isEmpty()) { + // Abort if all failed. + if (successList.isEmpty()) { throw new IOException("Cannot recover " + block + ", the following datanodes failed: " + failedList); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/33f82323/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java index d62194c..c82b47c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java @@ -228,6 +228,50 @@ public class TestLeaseRecovery { } /** + * Block/lease recovery should be retried with failed nodes from the second + * stage removed to avoid perpetual recovery failures. + */ + @Test + public void testBlockRecoveryRetryAfterFailedRecovery() throws Exception { +Configuration conf = new Configuration(); +cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); +Path file = new Path("/testBlockRecoveryRetryAfterFailedRecovery"); +DistributedFileSystem dfs = cluster.getFileSystem(); + +// Create a file. +FSDataOutputStream out = dfs.create(file); +final int FILE_SIZE = 128 * 1024; +int count = 0; +while (count < FILE_SIZE) { + out.writeBytes("DE K9SUL"); + count += 8; +} +out.hsync(); + +// Abort the original stream. +((DFSOutputStream) out.getWrappedStream()).abort(); + +LocatedBlocks locations = cluster.getNameNodeRpc().getBlockLocations( +file.toString(), 0, count); +ExtendedBlock block = locations.get(0).getBlock(); + +// Finalize one replica to simulate a partial close failure. +cluster.getDataNodes().get(0).getFSDataset().finalizeBlock(block, false); +// Delete the meta file to simulate a rename/move failure. +cluster.deleteMeta(0, block); + +// Try to recover the lease. +DistributedFileSystem newDfs = (DistributedFileSystem) FileSystem +.newInstance(cluster.getConfiguration(0)); +count = 0; +while (count++ < 15 && !newDfs.recoverLease(file)) { + Thread.sleep(1000); +} +// The lease should have been recovered. +assertTrue("File should be closed", newDfs.recoverLease(file)); + } + + /** * Recover the lease on a file and append file from another client. */ @Test - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail:
hadoop git commit: HDFS-12070. Failed block recovery leaves files open indefinitely and at risk for data loss. Contributed by Kihwal Lee.
Repository: hadoop Updated Branches: refs/heads/trunk 2fa7963c3 -> 451265a83 HDFS-12070. Failed block recovery leaves files open indefinitely and at risk for data loss. Contributed by Kihwal Lee. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/451265a8 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/451265a8 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/451265a8 Branch: refs/heads/trunk Commit: 451265a83d8798624ae2a144bc58fa41db826704 Parents: 2fa7963 Author: Kihwal LeeAuthored: Mon Feb 26 10:28:04 2018 -0600 Committer: Kihwal Lee Committed: Mon Feb 26 10:28:04 2018 -0600 -- .../server/datanode/BlockRecoveryWorker.java| 6 +-- .../apache/hadoop/hdfs/TestLeaseRecovery.java | 44 2 files changed, 46 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/451265a8/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java index 2ecd986..94835e2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockRecoveryWorker.java @@ -307,10 +307,8 @@ public class BlockRecoveryWorker { } } - // If any of the data-nodes failed, the recovery fails, because - // we never know the actual state of the replica on failed data-nodes. - // The recovery should be started over. - if (!failedList.isEmpty()) { + // Abort if all failed. + if (successList.isEmpty()) { throw new IOException("Cannot recover " + block + ", the following datanodes failed: " + failedList); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/451265a8/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java index d62194c..c82b47c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java @@ -228,6 +228,50 @@ public class TestLeaseRecovery { } /** + * Block/lease recovery should be retried with failed nodes from the second + * stage removed to avoid perpetual recovery failures. + */ + @Test + public void testBlockRecoveryRetryAfterFailedRecovery() throws Exception { +Configuration conf = new Configuration(); +cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); +Path file = new Path("/testBlockRecoveryRetryAfterFailedRecovery"); +DistributedFileSystem dfs = cluster.getFileSystem(); + +// Create a file. +FSDataOutputStream out = dfs.create(file); +final int FILE_SIZE = 128 * 1024; +int count = 0; +while (count < FILE_SIZE) { + out.writeBytes("DE K9SUL"); + count += 8; +} +out.hsync(); + +// Abort the original stream. +((DFSOutputStream) out.getWrappedStream()).abort(); + +LocatedBlocks locations = cluster.getNameNodeRpc().getBlockLocations( +file.toString(), 0, count); +ExtendedBlock block = locations.get(0).getBlock(); + +// Finalize one replica to simulate a partial close failure. +cluster.getDataNodes().get(0).getFSDataset().finalizeBlock(block, false); +// Delete the meta file to simulate a rename/move failure. +cluster.deleteMeta(0, block); + +// Try to recover the lease. +DistributedFileSystem newDfs = (DistributedFileSystem) FileSystem +.newInstance(cluster.getConfiguration(0)); +count = 0; +while (count++ < 15 && !newDfs.recoverLease(file)) { + Thread.sleep(1000); +} +// The lease should have been recovered. +assertTrue("File should be closed", newDfs.recoverLease(file)); + } + + /** * Recover the lease on a file and append file from another client. */ @Test - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org