Repository: hadoop Updated Branches: refs/heads/trunk 8d31ddcfe -> 6d5e87aec
HADOOP-15507. Add MapReduce counters about EC bytes read. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/6d5e87ae Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/6d5e87ae Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/6d5e87ae Branch: refs/heads/trunk Commit: 6d5e87aec2f615ed265dc495873bf53ee7d2ace2 Parents: 8d31ddc Author: Xiao Chen <[email protected]> Authored: Mon Jun 4 21:13:17 2018 -0700 Committer: Xiao Chen <[email protected]> Committed: Mon Jun 4 21:13:47 2018 -0700 ---------------------------------------------------------------------- .../java/org/apache/hadoop/fs/FileSystem.java | 34 ++++++++++++++++++++ .../hadoop/fs/FileSystemStorageStatistics.java | 5 ++- .../fs/TestFileSystemStorageStatistics.java | 6 +++- .../java/org/apache/hadoop/hdfs/DFSClient.java | 6 ++++ .../org/apache/hadoop/hdfs/DFSInputStream.java | 4 +++ .../org/apache/hadoop/hdfs/ReaderStrategy.java | 8 +++++ .../java/org/apache/hadoop/mapred/Task.java | 14 +++++++- .../hadoop/mapreduce/FileSystemCounter.java | 1 + .../mapreduce/FileSystemCounter.properties | 1 + 9 files changed, 76 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/6d5e87ae/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java index 707b921..c309941 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java @@ -3605,6 +3605,7 @@ public abstract class FileSystem extends Configured implements Closeable { private volatile long bytesReadDistanceOfOneOrTwo; private volatile long bytesReadDistanceOfThreeOrFour; private volatile long bytesReadDistanceOfFiveOrLarger; + private volatile long bytesReadErasureCoded; /** * Add another StatisticsData object to this one. @@ -3621,6 +3622,7 @@ public abstract class FileSystem extends Configured implements Closeable { other.bytesReadDistanceOfThreeOrFour; this.bytesReadDistanceOfFiveOrLarger += other.bytesReadDistanceOfFiveOrLarger; + this.bytesReadErasureCoded += other.bytesReadErasureCoded; } /** @@ -3638,6 +3640,7 @@ public abstract class FileSystem extends Configured implements Closeable { -this.bytesReadDistanceOfThreeOrFour; this.bytesReadDistanceOfFiveOrLarger = -this.bytesReadDistanceOfFiveOrLarger; + this.bytesReadErasureCoded = -this.bytesReadErasureCoded; } @Override @@ -3682,6 +3685,10 @@ public abstract class FileSystem extends Configured implements Closeable { public long getBytesReadDistanceOfFiveOrLarger() { return bytesReadDistanceOfFiveOrLarger; } + + public long getBytesReadErasureCoded() { + return bytesReadErasureCoded; + } } private interface StatisticsAggregator<T> { @@ -3874,6 +3881,14 @@ public abstract class FileSystem extends Configured implements Closeable { } /** + * Increment the bytes read on erasure-coded files in the statistics. + * @param newBytes the additional bytes read + */ + public void incrementBytesReadErasureCoded(long newBytes) { + getThreadStatistics().bytesReadErasureCoded += newBytes; + } + + /** * Increment the bytes read by the network distance in the statistics * In the common network topology setup, distance value should be an even * number such as 0, 2, 4, 6. To make it more general, we group distance @@ -4067,6 +4082,25 @@ public abstract class FileSystem extends Configured implements Closeable { }); } + /** + * Get the total number of bytes read on erasure-coded files. + * @return the number of bytes + */ + public long getBytesReadErasureCoded() { + return visitAll(new StatisticsAggregator<Long>() { + private long bytesReadErasureCoded = 0; + + @Override + public void accept(StatisticsData data) { + bytesReadErasureCoded += data.bytesReadErasureCoded; + } + + public Long aggregate() { + return bytesReadErasureCoded; + } + }); + } + @Override public String toString() { return visitAll(new StatisticsAggregator<String>() { http://git-wip-us.apache.org/repos/asf/hadoop/blob/6d5e87ae/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java index 8c633f6..43c23ab 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemStorageStatistics.java @@ -46,7 +46,8 @@ public class FileSystemStorageStatistics extends StorageStatistics { "bytesReadLocalHost", "bytesReadDistanceOfOneOrTwo", "bytesReadDistanceOfThreeOrFour", - "bytesReadDistanceOfFiveOrLarger" + "bytesReadDistanceOfFiveOrLarger", + "bytesReadErasureCoded" }; private static class LongStatisticIterator @@ -104,6 +105,8 @@ public class FileSystemStorageStatistics extends StorageStatistics { return data.getBytesReadDistanceOfThreeOrFour(); case "bytesReadDistanceOfFiveOrLarger": return data.getBytesReadDistanceOfFiveOrLarger(); + case "bytesReadErasureCoded": + return data.getBytesReadErasureCoded(); default: return null; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/6d5e87ae/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileSystemStorageStatistics.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileSystemStorageStatistics.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileSystemStorageStatistics.java index 8debb69..597eb93 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileSystemStorageStatistics.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileSystemStorageStatistics.java @@ -51,7 +51,8 @@ public class TestFileSystemStorageStatistics { "bytesReadLocalHost", "bytesReadDistanceOfOneOrTwo", "bytesReadDistanceOfThreeOrFour", - "bytesReadDistanceOfFiveOrLarger" + "bytesReadDistanceOfFiveOrLarger", + "bytesReadErasureCoded" }; private FileSystem.Statistics statistics = @@ -74,6 +75,7 @@ public class TestFileSystemStorageStatistics { statistics.incrementBytesReadByDistance(0, RandomUtils.nextInt(100)); statistics.incrementBytesReadByDistance(1, RandomUtils.nextInt(100)); statistics.incrementBytesReadByDistance(3, RandomUtils.nextInt(100)); + statistics.incrementBytesReadErasureCoded(RandomUtils.nextInt(100)); } @Test @@ -126,6 +128,8 @@ public class TestFileSystemStorageStatistics { return statistics.getBytesReadByDistance(3); case "bytesReadDistanceOfFiveOrLarger": return statistics.getBytesReadByDistance(5); + case "bytesReadErasureCoded": + return statistics.getBytesReadErasureCoded(); default: return 0; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/6d5e87ae/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java index 5f1b2bb..96c4505 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java @@ -2942,6 +2942,12 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, } } + void updateFileSystemECReadStats(int nRead) { + if (stats != null) { + stats.incrementBytesReadErasureCoded(nRead); + } + } + /** * Create hedged reads thread pool, HEDGED_READ_THREAD_POOL, if * it does not already exist. http://git-wip-us.apache.org/repos/asf/hadoop/blob/6d5e87ae/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java index e250873..4d70fee 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java @@ -61,6 +61,7 @@ import org.apache.hadoop.fs.StreamCapabilities; import org.apache.hadoop.hdfs.DFSUtilClient.CorruptedBlocks; import org.apache.hadoop.hdfs.client.impl.BlockReaderFactory; import org.apache.hadoop.hdfs.client.impl.DfsClientConf; +import org.apache.hadoop.hdfs.protocol.BlockType; import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; @@ -1082,6 +1083,9 @@ public class DFSInputStream extends FSInputStream IOUtilsClient.updateReadStatistics(readStatistics, nread, reader); dfsClient.updateFileSystemReadStats( reader.getNetworkDistance(), nread); + if (readStatistics.getBlockType() == BlockType.STRIPED) { + dfsClient.updateFileSystemECReadStats(nread); + } if (nread != len) { throw new IOException("truncated return from reader.read(): " + "excpected " + len + ", got " + nread); http://git-wip-us.apache.org/repos/asf/hadoop/blob/6d5e87ae/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ReaderStrategy.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ReaderStrategy.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ReaderStrategy.java index c984c3b..39ad2ff 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ReaderStrategy.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ReaderStrategy.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hdfs; +import org.apache.hadoop.hdfs.protocol.BlockType; + import java.io.IOException; import java.nio.ByteBuffer; import static org.apache.hadoop.hdfs.util.IOUtilsClient.updateReadStatistics; @@ -121,6 +123,9 @@ class ByteArrayStrategy implements ReaderStrategy { updateReadStatistics(readStatistics, nRead, blockReader); dfsClient.updateFileSystemReadStats(blockReader.getNetworkDistance(), nRead); + if (readStatistics.getBlockType() == BlockType.STRIPED) { + dfsClient.updateFileSystemECReadStats(nRead); + } offset += nRead; } return nRead; @@ -188,6 +193,9 @@ class ByteBufferStrategy implements ReaderStrategy { updateReadStatistics(readStatistics, nRead, blockReader); dfsClient.updateFileSystemReadStats(blockReader.getNetworkDistance(), nRead); + if (readStatistics.getBlockType() == BlockType.STRIPED) { + dfsClient.updateFileSystemECReadStats(nRead); + } } return nRead; http://git-wip-us.apache.org/repos/asf/hadoop/blob/6d5e87ae/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java index d83a6b0..9b62afc 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java @@ -86,6 +86,7 @@ abstract public class Task implements Writable, Configurable { public static String MERGED_OUTPUT_PREFIX = ".merged"; public static final long DEFAULT_COMBINE_RECORDS_BEFORE_PROGRESS = 10000; + private static final String HDFS_URI_SCHEME = "hdfs"; /** * @deprecated Provided for compatibility. Use {@link TaskCounter} instead. @@ -1125,7 +1126,8 @@ abstract public class Task implements Writable, Configurable { class FileSystemStatisticUpdater { private List<FileSystem.Statistics> stats; private Counters.Counter readBytesCounter, writeBytesCounter, - readOpsCounter, largeReadOpsCounter, writeOpsCounter; + readOpsCounter, largeReadOpsCounter, writeOpsCounter, + readBytesEcCounter; private String scheme; FileSystemStatisticUpdater(List<FileSystem.Statistics> stats, String scheme) { this.stats = stats; @@ -1153,23 +1155,33 @@ abstract public class Task implements Writable, Configurable { writeOpsCounter = counters.findCounter(scheme, FileSystemCounter.WRITE_OPS); } + if (readBytesEcCounter == null && scheme.equals(HDFS_URI_SCHEME)) { + // EC bytes only applies to hdfs + readBytesEcCounter = + counters.findCounter(scheme, FileSystemCounter.BYTES_READ_EC); + } long readBytes = 0; long writeBytes = 0; long readOps = 0; long largeReadOps = 0; long writeOps = 0; + long readBytesEC = 0; for (FileSystem.Statistics stat: stats) { readBytes = readBytes + stat.getBytesRead(); writeBytes = writeBytes + stat.getBytesWritten(); readOps = readOps + stat.getReadOps(); largeReadOps = largeReadOps + stat.getLargeReadOps(); writeOps = writeOps + stat.getWriteOps(); + readBytesEC = readBytesEC + stat.getBytesReadErasureCoded(); } readBytesCounter.setValue(readBytes); writeBytesCounter.setValue(writeBytes); readOpsCounter.setValue(readOps); largeReadOpsCounter.setValue(largeReadOps); writeOpsCounter.setValue(writeOps); + if (readBytesEcCounter != null) { + readBytesEcCounter.setValue(readBytesEC); + } } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/6d5e87ae/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/FileSystemCounter.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/FileSystemCounter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/FileSystemCounter.java index 3624b1a..e27d1dc 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/FileSystemCounter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/FileSystemCounter.java @@ -27,4 +27,5 @@ public enum FileSystemCounter { READ_OPS, LARGE_READ_OPS, WRITE_OPS, + BYTES_READ_EC, } http://git-wip-us.apache.org/repos/asf/hadoop/blob/6d5e87ae/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/FileSystemCounter.properties ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/FileSystemCounter.properties b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/FileSystemCounter.properties index 58089af..bc405c8 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/FileSystemCounter.properties +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/FileSystemCounter.properties @@ -19,3 +19,4 @@ BYTES_WRITTEN.name= Number of bytes written READ_OPS.name= Number of read operations LARGE_READ_OPS.name= Number of large read operations WRITE_OPS.name= Number of write operations +BYTES_READ_EC.name= Number of bytes read erasure-coded --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
