hadoop git commit: HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized replica. Contributed by Gabor Bota, Wei-Chiu Chuang.

2018-02-20 Thread xiao
Repository: hadoop
Updated Branches:
  refs/heads/branch-2.7 75a303b54 -> 829959a8f


HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized 
replica. Contributed by Gabor Bota, Wei-Chiu Chuang.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/829959a8
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/829959a8
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/829959a8

Branch: refs/heads/branch-2.7
Commit: 829959a8ff1c034dd3116328f9a4a4d2f1d92f00
Parents: 75a303b
Author: Xiao Chen 
Authored: Tue Feb 20 23:26:21 2018 -0800
Committer: Xiao Chen 
Committed: Tue Feb 20 23:27:20 2018 -0800

--
 .../hdfs/server/datanode/BlockSender.java   | 56 +++
 .../hdfs/server/datanode/FinalizedReplica.java  | 71 
 .../datanode/fsdataset/impl/FsDatasetImpl.java  | 18 -
 .../org/apache/hadoop/hdfs/MiniDFSCluster.java  | 23 +++
 .../namenode/TestListCorruptFileBlocks.java |  4 +-
 5 files changed, 127 insertions(+), 45 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/829959a8/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
--
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
index c042190..71e4804 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
@@ -170,8 +170,13 @@ class BlockSender implements java.io.Closeable {
* See {{@link BlockSender#isLongRead()}
*/
   private static final long LONG_READ_THRESHOLD_BYTES = 256 * 1024;
-  
 
+  // The number of bytes per checksum here determines the alignment
+  // of reads: we always start reading at a checksum chunk boundary,
+  // even if the checksum type is NULL. So, choosing too big of a value
+  // would risk sending too much unnecessary data. 512 (1 disk sector)
+  // is likely to result in minimal extra IO.
+  private static final long CHUNK_SIZE = 512;
   /**
* Constructor
* 
@@ -241,12 +246,6 @@ class BlockSender implements java.io.Closeable {
   synchronized(datanode.data) { 
 replica = getReplica(block, datanode);
 replicaVisibleLength = replica.getVisibleLength();
-if (replica instanceof FinalizedReplica) {
-  // Load last checksum in case the replica is being written
-  // concurrently
-  final FinalizedReplica frep = (FinalizedReplica) replica;
-  chunkChecksum = frep.getLastChecksumAndDataLen();
-}
   }
   // if there is a write in progress
   if (replica instanceof ReplicaBeingWritten) {
@@ -254,6 +253,10 @@ class BlockSender implements java.io.Closeable {
 waitForMinLength(rbw, startOffset + length);
 chunkChecksum = rbw.getLastChecksumAndDataLen();
   }
+  if (replica instanceof FinalizedReplica) {
+chunkChecksum = getPartialChunkChecksumForFinalized(
+(FinalizedReplica)replica);
+  }
 
   if (replica.getGenerationStamp() < block.getGenerationStamp()) {
 throw new IOException("Replica gen stamp < block genstamp, block="
@@ -329,12 +332,8 @@ class BlockSender implements java.io.Closeable {
 }
   }
   if (csum == null) {
-// The number of bytes per checksum here determines the alignment
-// of reads: we always start reading at a checksum chunk boundary,
-// even if the checksum type is NULL. So, choosing too big of a value
-// would risk sending too much unnecessary data. 512 (1 disk sector)
-// is likely to result in minimal extra IO.
-csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 512);
+csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL,
+(int)CHUNK_SIZE);
   }
 
   /*
@@ -410,6 +409,37 @@ class BlockSender implements java.io.Closeable {
 }
   }
 
+  private ChunkChecksum getPartialChunkChecksumForFinalized(
+  FinalizedReplica finalized) throws IOException {
+// There are a number of places in the code base where a finalized replica
+// object is created. If last partial checksum is loaded whenever a
+// finalized replica is created, it would increase latency in DataNode
+// initialization. Therefore, the last partial chunk checksum is loaded
+// lazily.
+
+// Load last checksum in case the replica is being written concurrently
+final long replicaVisibleLength = 

hadoop git commit: HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized replica. Contributed by Gabor Bota, Wei-Chiu Chuang.

2018-02-19 Thread xiao
Repository: hadoop
Updated Branches:
  refs/heads/branch-2.8 f41ced7f5 -> 0588fde3a


HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized 
replica. Contributed by Gabor Bota, Wei-Chiu Chuang.

(cherry picked from commit 49ed7d7fc9cd9ea280460cc11738df81c492be68)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/0588fde3
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/0588fde3
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/0588fde3

Branch: refs/heads/branch-2.8
Commit: 0588fde3a272288fb5ed94c28b364c4d8fae5d55
Parents: f41ced7
Author: Xiao Chen 
Authored: Mon Feb 19 19:14:38 2018 -0800
Committer: Xiao Chen 
Committed: Mon Feb 19 19:19:13 2018 -0800

--
 .../hdfs/server/datanode/BlockSender.java   | 56 +++
 .../hdfs/server/datanode/FinalizedReplica.java  | 72 
 .../datanode/fsdataset/impl/FsDatasetImpl.java  | 18 -
 .../org/apache/hadoop/hdfs/MiniDFSCluster.java  | 23 +++
 .../namenode/TestListCorruptFileBlocks.java |  4 +-
 5 files changed, 127 insertions(+), 46 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/0588fde3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
--
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
index ec16e94..ec84e6e 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
@@ -181,8 +181,13 @@ class BlockSender implements java.io.Closeable {
* See {{@link BlockSender#isLongRead()}
*/
   private static final long LONG_READ_THRESHOLD_BYTES = 256 * 1024;
-  
 
+  // The number of bytes per checksum here determines the alignment
+  // of reads: we always start reading at a checksum chunk boundary,
+  // even if the checksum type is NULL. So, choosing too big of a value
+  // would risk sending too much unnecessary data. 512 (1 disk sector)
+  // is likely to result in minimal extra IO.
+  private static final long CHUNK_SIZE = 512;
   /**
* Constructor
* 
@@ -252,12 +257,6 @@ class BlockSender implements java.io.Closeable {
   try(AutoCloseableLock lock = datanode.data.acquireDatasetLock()) {
 replica = getReplica(block, datanode);
 replicaVisibleLength = replica.getVisibleLength();
-if (replica instanceof FinalizedReplica) {
-  // Load last checksum in case the replica is being written
-  // concurrently
-  final FinalizedReplica frep = (FinalizedReplica) replica;
-  chunkChecksum = frep.getLastChecksumAndDataLen();
-}
   }
   // if there is a write in progress
   if (replica instanceof ReplicaBeingWritten) {
@@ -265,6 +264,10 @@ class BlockSender implements java.io.Closeable {
 waitForMinLength(rbw, startOffset + length);
 chunkChecksum = rbw.getLastChecksumAndDataLen();
   }
+  if (replica instanceof FinalizedReplica) {
+chunkChecksum = getPartialChunkChecksumForFinalized(
+(FinalizedReplica)replica);
+  }
 
   if (replica.getGenerationStamp() < block.getGenerationStamp()) {
 throw new IOException("Replica gen stamp < block genstamp, block="
@@ -353,12 +356,8 @@ class BlockSender implements java.io.Closeable {
 }
   }
   if (csum == null) {
-// The number of bytes per checksum here determines the alignment
-// of reads: we always start reading at a checksum chunk boundary,
-// even if the checksum type is NULL. So, choosing too big of a value
-// would risk sending too much unnecessary data. 512 (1 disk sector)
-// is likely to result in minimal extra IO.
-csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 512);
+csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL,
+(int)CHUNK_SIZE);
   }
 
   /*
@@ -434,6 +433,37 @@ class BlockSender implements java.io.Closeable {
 }
   }
 
+  private ChunkChecksum getPartialChunkChecksumForFinalized(
+  FinalizedReplica finalized) throws IOException {
+// There are a number of places in the code base where a finalized replica
+// object is created. If last partial checksum is loaded whenever a
+// finalized replica is created, it would increase latency in DataNode
+// initialization. Therefore, the last partial chunk checksum is loaded
+// lazily.
+
+// 

hadoop git commit: HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized replica. Contributed by Gabor Bota, Wei-Chiu Chuang.

2018-02-19 Thread xiao
Repository: hadoop
Updated Branches:
  refs/heads/branch-2.9 c753df4b6 -> c16b91fde


HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized 
replica. Contributed by Gabor Bota, Wei-Chiu Chuang.

(cherry picked from commit 49ed7d7fc9cd9ea280460cc11738df81c492be68)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/c16b91fd
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/c16b91fd
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/c16b91fd

Branch: refs/heads/branch-2.9
Commit: c16b91fde0baf16c5f595a9e12d5c5867524eb18
Parents: c753df4
Author: Xiao Chen 
Authored: Mon Feb 19 19:14:38 2018 -0800
Committer: Xiao Chen 
Committed: Mon Feb 19 19:19:02 2018 -0800

--
 .../hdfs/server/datanode/BlockSender.java   | 56 +++
 .../hdfs/server/datanode/FinalizedReplica.java  | 72 
 .../datanode/fsdataset/impl/FsDatasetImpl.java  | 18 -
 .../org/apache/hadoop/hdfs/MiniDFSCluster.java  | 23 +++
 .../namenode/TestListCorruptFileBlocks.java |  4 +-
 5 files changed, 127 insertions(+), 46 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/c16b91fd/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
--
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
index 210dbdd..ff81b5a 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
@@ -175,8 +175,13 @@ class BlockSender implements java.io.Closeable {
* See {{@link BlockSender#isLongRead()}
*/
   private static final long LONG_READ_THRESHOLD_BYTES = 256 * 1024;
-  
 
+  // The number of bytes per checksum here determines the alignment
+  // of reads: we always start reading at a checksum chunk boundary,
+  // even if the checksum type is NULL. So, choosing too big of a value
+  // would risk sending too much unnecessary data. 512 (1 disk sector)
+  // is likely to result in minimal extra IO.
+  private static final long CHUNK_SIZE = 512;
   /**
* Constructor
* 
@@ -250,12 +255,6 @@ class BlockSender implements java.io.Closeable {
   try(AutoCloseableLock lock = datanode.data.acquireDatasetLock()) {
 replica = getReplica(block, datanode);
 replicaVisibleLength = replica.getVisibleLength();
-if (replica instanceof FinalizedReplica) {
-  // Load last checksum in case the replica is being written
-  // concurrently
-  final FinalizedReplica frep = (FinalizedReplica) replica;
-  chunkChecksum = frep.getLastChecksumAndDataLen();
-}
   }
   // if there is a write in progress
   if (replica instanceof ReplicaBeingWritten) {
@@ -263,6 +262,10 @@ class BlockSender implements java.io.Closeable {
 waitForMinLength(rbw, startOffset + length);
 chunkChecksum = rbw.getLastChecksumAndDataLen();
   }
+  if (replica instanceof FinalizedReplica) {
+chunkChecksum = getPartialChunkChecksumForFinalized(
+(FinalizedReplica)replica);
+  }
 
   if (replica.getGenerationStamp() < block.getGenerationStamp()) {
 throw new IOException("Replica gen stamp < block genstamp, block="
@@ -349,12 +352,8 @@ class BlockSender implements java.io.Closeable {
 }
   }
   if (csum == null) {
-// The number of bytes per checksum here determines the alignment
-// of reads: we always start reading at a checksum chunk boundary,
-// even if the checksum type is NULL. So, choosing too big of a value
-// would risk sending too much unnecessary data. 512 (1 disk sector)
-// is likely to result in minimal extra IO.
-csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 512);
+csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL,
+(int)CHUNK_SIZE);
   }
 
   /*
@@ -428,6 +427,37 @@ class BlockSender implements java.io.Closeable {
 }
   }
 
+  private ChunkChecksum getPartialChunkChecksumForFinalized(
+  FinalizedReplica finalized) throws IOException {
+// There are a number of places in the code base where a finalized replica
+// object is created. If last partial checksum is loaded whenever a
+// finalized replica is created, it would increase latency in DataNode
+// initialization. Therefore, the last partial chunk checksum is loaded
+// lazily.
+
+// 

hadoop git commit: HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized replica. Contributed by Gabor Bota, Wei-Chiu Chuang.

2018-02-19 Thread xiao
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 a278ad6b9 -> 49ed7d7fc


HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized 
replica. Contributed by Gabor Bota, Wei-Chiu Chuang.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/49ed7d7f
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/49ed7d7f
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/49ed7d7f

Branch: refs/heads/branch-2
Commit: 49ed7d7fc9cd9ea280460cc11738df81c492be68
Parents: a278ad6
Author: Xiao Chen 
Authored: Mon Feb 19 19:14:38 2018 -0800
Committer: Xiao Chen 
Committed: Mon Feb 19 19:15:02 2018 -0800

--
 .../hdfs/server/datanode/BlockSender.java   | 56 +++
 .../hdfs/server/datanode/FinalizedReplica.java  | 72 
 .../datanode/fsdataset/impl/FsDatasetImpl.java  | 18 -
 .../org/apache/hadoop/hdfs/MiniDFSCluster.java  | 23 +++
 .../namenode/TestListCorruptFileBlocks.java |  4 +-
 5 files changed, 127 insertions(+), 46 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/49ed7d7f/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
--
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
index 210dbdd..ff81b5a 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
@@ -175,8 +175,13 @@ class BlockSender implements java.io.Closeable {
* See {{@link BlockSender#isLongRead()}
*/
   private static final long LONG_READ_THRESHOLD_BYTES = 256 * 1024;
-  
 
+  // The number of bytes per checksum here determines the alignment
+  // of reads: we always start reading at a checksum chunk boundary,
+  // even if the checksum type is NULL. So, choosing too big of a value
+  // would risk sending too much unnecessary data. 512 (1 disk sector)
+  // is likely to result in minimal extra IO.
+  private static final long CHUNK_SIZE = 512;
   /**
* Constructor
* 
@@ -250,12 +255,6 @@ class BlockSender implements java.io.Closeable {
   try(AutoCloseableLock lock = datanode.data.acquireDatasetLock()) {
 replica = getReplica(block, datanode);
 replicaVisibleLength = replica.getVisibleLength();
-if (replica instanceof FinalizedReplica) {
-  // Load last checksum in case the replica is being written
-  // concurrently
-  final FinalizedReplica frep = (FinalizedReplica) replica;
-  chunkChecksum = frep.getLastChecksumAndDataLen();
-}
   }
   // if there is a write in progress
   if (replica instanceof ReplicaBeingWritten) {
@@ -263,6 +262,10 @@ class BlockSender implements java.io.Closeable {
 waitForMinLength(rbw, startOffset + length);
 chunkChecksum = rbw.getLastChecksumAndDataLen();
   }
+  if (replica instanceof FinalizedReplica) {
+chunkChecksum = getPartialChunkChecksumForFinalized(
+(FinalizedReplica)replica);
+  }
 
   if (replica.getGenerationStamp() < block.getGenerationStamp()) {
 throw new IOException("Replica gen stamp < block genstamp, block="
@@ -349,12 +352,8 @@ class BlockSender implements java.io.Closeable {
 }
   }
   if (csum == null) {
-// The number of bytes per checksum here determines the alignment
-// of reads: we always start reading at a checksum chunk boundary,
-// even if the checksum type is NULL. So, choosing too big of a value
-// would risk sending too much unnecessary data. 512 (1 disk sector)
-// is likely to result in minimal extra IO.
-csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 512);
+csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL,
+(int)CHUNK_SIZE);
   }
 
   /*
@@ -428,6 +427,37 @@ class BlockSender implements java.io.Closeable {
 }
   }
 
+  private ChunkChecksum getPartialChunkChecksumForFinalized(
+  FinalizedReplica finalized) throws IOException {
+// There are a number of places in the code base where a finalized replica
+// object is created. If last partial checksum is loaded whenever a
+// finalized replica is created, it would increase latency in DataNode
+// initialization. Therefore, the last partial chunk checksum is loaded
+// lazily.
+
+// Load last checksum in case the replica is being written concurrently
+