[2/2] hadoop git commit: HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized replica. Contributed by Wei-Chiu Chuang.

2018-02-06 Thread xiao
HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized 
replica. Contributed by Wei-Chiu Chuang.

(cherry picked from commit 2021f4bdce3b27c46edaad198f0007a26a8a1391)

Conflicts:

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaBuilder.java


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/29a3b64e
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/29a3b64e
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/29a3b64e

Branch: refs/heads/branch-3.0
Commit: 29a3b64ec078a7131f6fc732b98b85e50c33264c
Parents: d436c40
Author: Wei-Chiu Chuang 
Authored: Fri Feb 2 17:15:26 2018 -0800
Committer: Xiao Chen 
Committed: Tue Feb 6 22:57:15 2018 -0800

--
 .../hdfs/server/datanode/BlockSender.java   | 56 +++
 .../hdfs/server/datanode/FinalizedReplica.java  | 74 
 .../hdfs/server/datanode/ReplicaBuilder.java| 11 ++-
 .../datanode/fsdataset/impl/FsDatasetImpl.java  |  1 +
 .../datanode/fsdataset/impl/FsVolumeImpl.java   | 21 --
 .../org/apache/hadoop/hdfs/MiniDFSCluster.java  | 23 ++
 .../namenode/TestListCorruptFileBlocks.java |  4 +-
 7 files changed, 140 insertions(+), 50 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/29a3b64e/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
--
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
index 3ff5c75..268007f 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
@@ -175,8 +175,13 @@ class BlockSender implements java.io.Closeable {
* See {{@link BlockSender#isLongRead()}
*/
   private static final long LONG_READ_THRESHOLD_BYTES = 256 * 1024;
-  
 
+  // The number of bytes per checksum here determines the alignment
+  // of reads: we always start reading at a checksum chunk boundary,
+  // even if the checksum type is NULL. So, choosing too big of a value
+  // would risk sending too much unnecessary data. 512 (1 disk sector)
+  // is likely to result in minimal extra IO.
+  private static final long CHUNK_SIZE = 512;
   /**
* Constructor
* 
@@ -250,18 +255,16 @@ class BlockSender implements java.io.Closeable {
   try(AutoCloseableLock lock = datanode.data.acquireDatasetLock()) {
 replica = getReplica(block, datanode);
 replicaVisibleLength = replica.getVisibleLength();
-if (replica instanceof FinalizedReplica) {
-  // Load last checksum in case the replica is being written
-  // concurrently
-  final FinalizedReplica frep = (FinalizedReplica) replica;
-  chunkChecksum = frep.getLastChecksumAndDataLen();
-}
   }
   if (replica.getState() == ReplicaState.RBW) {
 final ReplicaInPipeline rbw = (ReplicaInPipeline) replica;
 waitForMinLength(rbw, startOffset + length);
 chunkChecksum = rbw.getLastChecksumAndDataLen();
   }
+  if (replica instanceof FinalizedReplica) {
+chunkChecksum = getPartialChunkChecksumForFinalized(
+(FinalizedReplica)replica);
+  }
 
   if (replica.getGenerationStamp() < block.getGenerationStamp()) {
 throw new IOException("Replica gen stamp < block genstamp, block="
@@ -348,12 +351,8 @@ class BlockSender implements java.io.Closeable {
 }
   }
   if (csum == null) {
-// The number of bytes per checksum here determines the alignment
-// of reads: we always start reading at a checksum chunk boundary,
-// even if the checksum type is NULL. So, choosing too big of a value
-// would risk sending too much unnecessary data. 512 (1 disk sector)
-// is likely to result in minimal extra IO.
-csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 512);
+csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL,
+(int)CHUNK_SIZE);
   }
 
   /*
@@ -427,6 +426,37 @@ class BlockSender implements java.io.Closeable {
 }
   }
 
+  private ChunkChecksum getPartialChunkChecksumForFinalized(
+  FinalizedReplica finalized) throws IOException {
+// There are a number of places in the code base where a finalized replica
+// object is created. If last partial checksum is loaded whenever a
+// finalized replica is created, it would increase latency in DataNode
+// initialization. Therefore, the

hadoop git commit: HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized replica. Contributed by Wei-Chiu Chuang.

2018-02-02 Thread weichiu
Repository: hadoop
Updated Branches:
  refs/heads/trunk c7101fe21 -> 2021f4bdc


HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized 
replica. Contributed by Wei-Chiu Chuang.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2021f4bd
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2021f4bd
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2021f4bd

Branch: refs/heads/trunk
Commit: 2021f4bdce3b27c46edaad198f0007a26a8a1391
Parents: c7101fe
Author: Wei-Chiu Chuang 
Authored: Fri Feb 2 17:15:26 2018 -0800
Committer: Wei-Chiu Chuang 
Committed: Fri Feb 2 17:18:42 2018 -0800

--
 .../hdfs/server/datanode/BlockSender.java   | 56 +++
 .../hdfs/server/datanode/FinalizedReplica.java  | 74 
 .../hdfs/server/datanode/ReplicaBuilder.java| 11 ++-
 .../datanode/fsdataset/impl/FsDatasetImpl.java  |  1 +
 .../datanode/fsdataset/impl/FsVolumeImpl.java   | 21 --
 .../org/apache/hadoop/hdfs/MiniDFSCluster.java  | 23 ++
 .../namenode/TestListCorruptFileBlocks.java |  4 +-
 7 files changed, 140 insertions(+), 50 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/2021f4bd/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
--
diff --git 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
index 3ff5c75..268007f 100644
--- 
a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
+++ 
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
@@ -175,8 +175,13 @@ class BlockSender implements java.io.Closeable {
* See {{@link BlockSender#isLongRead()}
*/
   private static final long LONG_READ_THRESHOLD_BYTES = 256 * 1024;
-  
 
+  // The number of bytes per checksum here determines the alignment
+  // of reads: we always start reading at a checksum chunk boundary,
+  // even if the checksum type is NULL. So, choosing too big of a value
+  // would risk sending too much unnecessary data. 512 (1 disk sector)
+  // is likely to result in minimal extra IO.
+  private static final long CHUNK_SIZE = 512;
   /**
* Constructor
* 
@@ -250,18 +255,16 @@ class BlockSender implements java.io.Closeable {
   try(AutoCloseableLock lock = datanode.data.acquireDatasetLock()) {
 replica = getReplica(block, datanode);
 replicaVisibleLength = replica.getVisibleLength();
-if (replica instanceof FinalizedReplica) {
-  // Load last checksum in case the replica is being written
-  // concurrently
-  final FinalizedReplica frep = (FinalizedReplica) replica;
-  chunkChecksum = frep.getLastChecksumAndDataLen();
-}
   }
   if (replica.getState() == ReplicaState.RBW) {
 final ReplicaInPipeline rbw = (ReplicaInPipeline) replica;
 waitForMinLength(rbw, startOffset + length);
 chunkChecksum = rbw.getLastChecksumAndDataLen();
   }
+  if (replica instanceof FinalizedReplica) {
+chunkChecksum = getPartialChunkChecksumForFinalized(
+(FinalizedReplica)replica);
+  }
 
   if (replica.getGenerationStamp() < block.getGenerationStamp()) {
 throw new IOException("Replica gen stamp < block genstamp, block="
@@ -348,12 +351,8 @@ class BlockSender implements java.io.Closeable {
 }
   }
   if (csum == null) {
-// The number of bytes per checksum here determines the alignment
-// of reads: we always start reading at a checksum chunk boundary,
-// even if the checksum type is NULL. So, choosing too big of a value
-// would risk sending too much unnecessary data. 512 (1 disk sector)
-// is likely to result in minimal extra IO.
-csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 512);
+csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL,
+(int)CHUNK_SIZE);
   }
 
   /*
@@ -427,6 +426,37 @@ class BlockSender implements java.io.Closeable {
 }
   }
 
+  private ChunkChecksum getPartialChunkChecksumForFinalized(
+  FinalizedReplica finalized) throws IOException {
+// There are a number of places in the code base where a finalized replica
+// object is created. If last partial checksum is loaded whenever a
+// finalized replica is created, it would increase latency in DataNode
+// initialization. Therefore, the last partial chunk checksum is loaded
+// lazily.
+
+// Load last checksum in case the replica is being w