hadoop git commit: HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized replica. Contributed by Gabor Bota, Wei-Chiu Chuang.
Repository: hadoop Updated Branches: refs/heads/branch-2.7 75a303b54 -> 829959a8f HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized replica. Contributed by Gabor Bota, Wei-Chiu Chuang. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/829959a8 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/829959a8 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/829959a8 Branch: refs/heads/branch-2.7 Commit: 829959a8ff1c034dd3116328f9a4a4d2f1d92f00 Parents: 75a303b Author: Xiao ChenAuthored: Tue Feb 20 23:26:21 2018 -0800 Committer: Xiao Chen Committed: Tue Feb 20 23:27:20 2018 -0800 -- .../hdfs/server/datanode/BlockSender.java | 56 +++ .../hdfs/server/datanode/FinalizedReplica.java | 71 .../datanode/fsdataset/impl/FsDatasetImpl.java | 18 - .../org/apache/hadoop/hdfs/MiniDFSCluster.java | 23 +++ .../namenode/TestListCorruptFileBlocks.java | 4 +- 5 files changed, 127 insertions(+), 45 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/829959a8/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java index c042190..71e4804 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java @@ -170,8 +170,13 @@ class BlockSender implements java.io.Closeable { * See {{@link BlockSender#isLongRead()} */ private static final long LONG_READ_THRESHOLD_BYTES = 256 * 1024; - + // The number of bytes per checksum here determines the alignment + // of reads: we always start reading at a checksum chunk boundary, + // even if the checksum type is NULL. So, choosing too big of a value + // would risk sending too much unnecessary data. 512 (1 disk sector) + // is likely to result in minimal extra IO. + private static final long CHUNK_SIZE = 512; /** * Constructor * @@ -241,12 +246,6 @@ class BlockSender implements java.io.Closeable { synchronized(datanode.data) { replica = getReplica(block, datanode); replicaVisibleLength = replica.getVisibleLength(); -if (replica instanceof FinalizedReplica) { - // Load last checksum in case the replica is being written - // concurrently - final FinalizedReplica frep = (FinalizedReplica) replica; - chunkChecksum = frep.getLastChecksumAndDataLen(); -} } // if there is a write in progress if (replica instanceof ReplicaBeingWritten) { @@ -254,6 +253,10 @@ class BlockSender implements java.io.Closeable { waitForMinLength(rbw, startOffset + length); chunkChecksum = rbw.getLastChecksumAndDataLen(); } + if (replica instanceof FinalizedReplica) { +chunkChecksum = getPartialChunkChecksumForFinalized( +(FinalizedReplica)replica); + } if (replica.getGenerationStamp() < block.getGenerationStamp()) { throw new IOException("Replica gen stamp < block genstamp, block=" @@ -329,12 +332,8 @@ class BlockSender implements java.io.Closeable { } } if (csum == null) { -// The number of bytes per checksum here determines the alignment -// of reads: we always start reading at a checksum chunk boundary, -// even if the checksum type is NULL. So, choosing too big of a value -// would risk sending too much unnecessary data. 512 (1 disk sector) -// is likely to result in minimal extra IO. -csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 512); +csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL, +(int)CHUNK_SIZE); } /* @@ -410,6 +409,37 @@ class BlockSender implements java.io.Closeable { } } + private ChunkChecksum getPartialChunkChecksumForFinalized( + FinalizedReplica finalized) throws IOException { +// There are a number of places in the code base where a finalized replica +// object is created. If last partial checksum is loaded whenever a +// finalized replica is created, it would increase latency in DataNode +// initialization. Therefore, the last partial chunk checksum is loaded +// lazily. + +// Load last checksum in case the replica is being written concurrently +final long replicaVisibleLength =
hadoop git commit: HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized replica. Contributed by Gabor Bota, Wei-Chiu Chuang.
Repository: hadoop Updated Branches: refs/heads/branch-2.8 f41ced7f5 -> 0588fde3a HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized replica. Contributed by Gabor Bota, Wei-Chiu Chuang. (cherry picked from commit 49ed7d7fc9cd9ea280460cc11738df81c492be68) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/0588fde3 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/0588fde3 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/0588fde3 Branch: refs/heads/branch-2.8 Commit: 0588fde3a272288fb5ed94c28b364c4d8fae5d55 Parents: f41ced7 Author: Xiao ChenAuthored: Mon Feb 19 19:14:38 2018 -0800 Committer: Xiao Chen Committed: Mon Feb 19 19:19:13 2018 -0800 -- .../hdfs/server/datanode/BlockSender.java | 56 +++ .../hdfs/server/datanode/FinalizedReplica.java | 72 .../datanode/fsdataset/impl/FsDatasetImpl.java | 18 - .../org/apache/hadoop/hdfs/MiniDFSCluster.java | 23 +++ .../namenode/TestListCorruptFileBlocks.java | 4 +- 5 files changed, 127 insertions(+), 46 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/0588fde3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java index ec16e94..ec84e6e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java @@ -181,8 +181,13 @@ class BlockSender implements java.io.Closeable { * See {{@link BlockSender#isLongRead()} */ private static final long LONG_READ_THRESHOLD_BYTES = 256 * 1024; - + // The number of bytes per checksum here determines the alignment + // of reads: we always start reading at a checksum chunk boundary, + // even if the checksum type is NULL. So, choosing too big of a value + // would risk sending too much unnecessary data. 512 (1 disk sector) + // is likely to result in minimal extra IO. + private static final long CHUNK_SIZE = 512; /** * Constructor * @@ -252,12 +257,6 @@ class BlockSender implements java.io.Closeable { try(AutoCloseableLock lock = datanode.data.acquireDatasetLock()) { replica = getReplica(block, datanode); replicaVisibleLength = replica.getVisibleLength(); -if (replica instanceof FinalizedReplica) { - // Load last checksum in case the replica is being written - // concurrently - final FinalizedReplica frep = (FinalizedReplica) replica; - chunkChecksum = frep.getLastChecksumAndDataLen(); -} } // if there is a write in progress if (replica instanceof ReplicaBeingWritten) { @@ -265,6 +264,10 @@ class BlockSender implements java.io.Closeable { waitForMinLength(rbw, startOffset + length); chunkChecksum = rbw.getLastChecksumAndDataLen(); } + if (replica instanceof FinalizedReplica) { +chunkChecksum = getPartialChunkChecksumForFinalized( +(FinalizedReplica)replica); + } if (replica.getGenerationStamp() < block.getGenerationStamp()) { throw new IOException("Replica gen stamp < block genstamp, block=" @@ -353,12 +356,8 @@ class BlockSender implements java.io.Closeable { } } if (csum == null) { -// The number of bytes per checksum here determines the alignment -// of reads: we always start reading at a checksum chunk boundary, -// even if the checksum type is NULL. So, choosing too big of a value -// would risk sending too much unnecessary data. 512 (1 disk sector) -// is likely to result in minimal extra IO. -csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 512); +csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL, +(int)CHUNK_SIZE); } /* @@ -434,6 +433,37 @@ class BlockSender implements java.io.Closeable { } } + private ChunkChecksum getPartialChunkChecksumForFinalized( + FinalizedReplica finalized) throws IOException { +// There are a number of places in the code base where a finalized replica +// object is created. If last partial checksum is loaded whenever a +// finalized replica is created, it would increase latency in DataNode +// initialization. Therefore, the last partial chunk checksum is loaded +// lazily. + +//
hadoop git commit: HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized replica. Contributed by Gabor Bota, Wei-Chiu Chuang.
Repository: hadoop Updated Branches: refs/heads/branch-2.9 c753df4b6 -> c16b91fde HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized replica. Contributed by Gabor Bota, Wei-Chiu Chuang. (cherry picked from commit 49ed7d7fc9cd9ea280460cc11738df81c492be68) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/c16b91fd Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/c16b91fd Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/c16b91fd Branch: refs/heads/branch-2.9 Commit: c16b91fde0baf16c5f595a9e12d5c5867524eb18 Parents: c753df4 Author: Xiao ChenAuthored: Mon Feb 19 19:14:38 2018 -0800 Committer: Xiao Chen Committed: Mon Feb 19 19:19:02 2018 -0800 -- .../hdfs/server/datanode/BlockSender.java | 56 +++ .../hdfs/server/datanode/FinalizedReplica.java | 72 .../datanode/fsdataset/impl/FsDatasetImpl.java | 18 - .../org/apache/hadoop/hdfs/MiniDFSCluster.java | 23 +++ .../namenode/TestListCorruptFileBlocks.java | 4 +- 5 files changed, 127 insertions(+), 46 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/c16b91fd/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java index 210dbdd..ff81b5a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java @@ -175,8 +175,13 @@ class BlockSender implements java.io.Closeable { * See {{@link BlockSender#isLongRead()} */ private static final long LONG_READ_THRESHOLD_BYTES = 256 * 1024; - + // The number of bytes per checksum here determines the alignment + // of reads: we always start reading at a checksum chunk boundary, + // even if the checksum type is NULL. So, choosing too big of a value + // would risk sending too much unnecessary data. 512 (1 disk sector) + // is likely to result in minimal extra IO. + private static final long CHUNK_SIZE = 512; /** * Constructor * @@ -250,12 +255,6 @@ class BlockSender implements java.io.Closeable { try(AutoCloseableLock lock = datanode.data.acquireDatasetLock()) { replica = getReplica(block, datanode); replicaVisibleLength = replica.getVisibleLength(); -if (replica instanceof FinalizedReplica) { - // Load last checksum in case the replica is being written - // concurrently - final FinalizedReplica frep = (FinalizedReplica) replica; - chunkChecksum = frep.getLastChecksumAndDataLen(); -} } // if there is a write in progress if (replica instanceof ReplicaBeingWritten) { @@ -263,6 +262,10 @@ class BlockSender implements java.io.Closeable { waitForMinLength(rbw, startOffset + length); chunkChecksum = rbw.getLastChecksumAndDataLen(); } + if (replica instanceof FinalizedReplica) { +chunkChecksum = getPartialChunkChecksumForFinalized( +(FinalizedReplica)replica); + } if (replica.getGenerationStamp() < block.getGenerationStamp()) { throw new IOException("Replica gen stamp < block genstamp, block=" @@ -349,12 +352,8 @@ class BlockSender implements java.io.Closeable { } } if (csum == null) { -// The number of bytes per checksum here determines the alignment -// of reads: we always start reading at a checksum chunk boundary, -// even if the checksum type is NULL. So, choosing too big of a value -// would risk sending too much unnecessary data. 512 (1 disk sector) -// is likely to result in minimal extra IO. -csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 512); +csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL, +(int)CHUNK_SIZE); } /* @@ -428,6 +427,37 @@ class BlockSender implements java.io.Closeable { } } + private ChunkChecksum getPartialChunkChecksumForFinalized( + FinalizedReplica finalized) throws IOException { +// There are a number of places in the code base where a finalized replica +// object is created. If last partial checksum is loaded whenever a +// finalized replica is created, it would increase latency in DataNode +// initialization. Therefore, the last partial chunk checksum is loaded +// lazily. + +//
hadoop git commit: HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized replica. Contributed by Gabor Bota, Wei-Chiu Chuang.
Repository: hadoop Updated Branches: refs/heads/branch-2 a278ad6b9 -> 49ed7d7fc HDFS-11187. Optimize disk access for last partial chunk checksum of Finalized replica. Contributed by Gabor Bota, Wei-Chiu Chuang. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/49ed7d7f Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/49ed7d7f Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/49ed7d7f Branch: refs/heads/branch-2 Commit: 49ed7d7fc9cd9ea280460cc11738df81c492be68 Parents: a278ad6 Author: Xiao ChenAuthored: Mon Feb 19 19:14:38 2018 -0800 Committer: Xiao Chen Committed: Mon Feb 19 19:15:02 2018 -0800 -- .../hdfs/server/datanode/BlockSender.java | 56 +++ .../hdfs/server/datanode/FinalizedReplica.java | 72 .../datanode/fsdataset/impl/FsDatasetImpl.java | 18 - .../org/apache/hadoop/hdfs/MiniDFSCluster.java | 23 +++ .../namenode/TestListCorruptFileBlocks.java | 4 +- 5 files changed, 127 insertions(+), 46 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/49ed7d7f/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java index 210dbdd..ff81b5a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java @@ -175,8 +175,13 @@ class BlockSender implements java.io.Closeable { * See {{@link BlockSender#isLongRead()} */ private static final long LONG_READ_THRESHOLD_BYTES = 256 * 1024; - + // The number of bytes per checksum here determines the alignment + // of reads: we always start reading at a checksum chunk boundary, + // even if the checksum type is NULL. So, choosing too big of a value + // would risk sending too much unnecessary data. 512 (1 disk sector) + // is likely to result in minimal extra IO. + private static final long CHUNK_SIZE = 512; /** * Constructor * @@ -250,12 +255,6 @@ class BlockSender implements java.io.Closeable { try(AutoCloseableLock lock = datanode.data.acquireDatasetLock()) { replica = getReplica(block, datanode); replicaVisibleLength = replica.getVisibleLength(); -if (replica instanceof FinalizedReplica) { - // Load last checksum in case the replica is being written - // concurrently - final FinalizedReplica frep = (FinalizedReplica) replica; - chunkChecksum = frep.getLastChecksumAndDataLen(); -} } // if there is a write in progress if (replica instanceof ReplicaBeingWritten) { @@ -263,6 +262,10 @@ class BlockSender implements java.io.Closeable { waitForMinLength(rbw, startOffset + length); chunkChecksum = rbw.getLastChecksumAndDataLen(); } + if (replica instanceof FinalizedReplica) { +chunkChecksum = getPartialChunkChecksumForFinalized( +(FinalizedReplica)replica); + } if (replica.getGenerationStamp() < block.getGenerationStamp()) { throw new IOException("Replica gen stamp < block genstamp, block=" @@ -349,12 +352,8 @@ class BlockSender implements java.io.Closeable { } } if (csum == null) { -// The number of bytes per checksum here determines the alignment -// of reads: we always start reading at a checksum chunk boundary, -// even if the checksum type is NULL. So, choosing too big of a value -// would risk sending too much unnecessary data. 512 (1 disk sector) -// is likely to result in minimal extra IO. -csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 512); +csum = DataChecksum.newDataChecksum(DataChecksum.Type.NULL, +(int)CHUNK_SIZE); } /* @@ -428,6 +427,37 @@ class BlockSender implements java.io.Closeable { } } + private ChunkChecksum getPartialChunkChecksumForFinalized( + FinalizedReplica finalized) throws IOException { +// There are a number of places in the code base where a finalized replica +// object is created. If last partial checksum is loaded whenever a +// finalized replica is created, it would increase latency in DataNode +// initialization. Therefore, the last partial chunk checksum is loaded +// lazily. + +// Load last checksum in case the replica is being written concurrently +