This is an automated email from the ASF dual-hosted git repository. stevel pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/hadoop.git
commit e1842b2a749d79cbdc15c524515b9eda64c339d5 Merge: e6ecc4f3e44 4d1f6f9b995 Author: Steve Loughran <[email protected]> AuthorDate: Wed Jun 22 17:33:40 2022 +0100 HADOOP-18103. Add a high-performance vectored read API. (#4476) This feature adds methods for ranged vectored read operations in PositionedReadable. All stream which implement that interface support the new API. The default implementation reads each range in the vector sequentially. However, specific implementations may provide higher performance versions. This is done in two places * Local FileSystem/Checksum FileSystem * The S3A client. The S3A client first coalesces adjacent and "nearby" ranges together, then fetches each range in separate HTTP GET requests, executed in parallel. As such it delivers significant speedups to applications reading separate blocks of data from the same file, columnar data format libraries in particular. This is the merge commit of the feature branch; the work is in HADOOP-11867. Add a high-performance vectored read API. HADOOP-18104. S3A: Add configs to configure minSeekForVectorReads and maxReadSizeForVectorReads. HADOOP-18107. Adding scale test for vectored reads for large file HADOOP-18105. Implement buffer pooling with weak references. HADOOP-18106. Handle memory fragmentation in S3A Vectored IO. Contributed By: Owen O'Malley and Mukund Thakur dev-support/Jenkinsfile | 2 +- .../apache/hadoop/fs/BufferedFSInputStream.java | 27 +- .../org/apache/hadoop/fs/ChecksumFileSystem.java | 213 +++++++++-- .../org/apache/hadoop/fs/FSDataInputStream.java | 22 +- .../main/java/org/apache/hadoop/fs/FileRange.java | 67 ++++ .../org/apache/hadoop/fs/PositionedReadable.java | 41 ++- .../org/apache/hadoop/fs/RawLocalFileSystem.java | 110 +++++- .../org/apache/hadoop/fs/StreamCapabilities.java | 6 + .../org/apache/hadoop/fs/VectoredReadUtils.java | 292 +++++++++++++++ .../apache/hadoop/fs/impl/CombinedFileRange.java | 70 ++++ .../org/apache/hadoop/fs/impl/FileRangeImpl.java | 74 ++++ .../java/org/apache/hadoop/io/ByteBufferPool.java | 5 + .../apache/hadoop/io/ElasticByteBufferPool.java | 4 +- .../io/WeakReferencedElasticByteBufferPool.java | 155 ++++++++ .../site/markdown/filesystem/fsdatainputstream.md | 39 ++ .../apache/hadoop/fs/TestVectoredReadUtils.java | 371 +++++++++++++++++++ .../contract/AbstractContractVectoredReadTest.java | 406 +++++++++++++++++++++ .../hadoop/fs/contract/ContractTestUtils.java | 84 +++++ .../localfs/TestLocalFSContractVectoredRead.java | 86 +++++ .../rawlocal/TestRawLocalContractVectoredRead.java | 35 ++ ...estMoreWeakReferencedElasticByteBufferPool.java | 97 +++++ .../TestWeakReferencedElasticByteBufferPool.java | 232 ++++++++++++ .../java/org/apache/hadoop/test/MoreAsserts.java | 49 ++- hadoop-common-project/pom.xml | 1 - hadoop-project/pom.xml | 11 + .../java/org/apache/hadoop/fs/s3a/Constants.java | 26 ++ .../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 39 +- .../org/apache/hadoop/fs/s3a/S3AInputStream.java | 391 +++++++++++++++++++- .../org/apache/hadoop/fs/s3a/S3AReadOpContext.java | 20 +- .../apache/hadoop/fs/s3a/VectoredIOContext.java | 78 ++++ .../fs/s3a/impl/GetContentSummaryOperation.java | 3 +- .../site/markdown/tools/hadoop-aws/performance.md | 30 ++ .../contract/s3a/ITestS3AContractVectoredRead.java | 159 ++++++++ .../hadoop/fs/s3a/TestS3AInputStreamRetry.java | 3 +- .../fs/s3a/scale/AbstractSTestS3AHugeFiles.java | 32 ++ .../hadoop-aws/src/test/resources/log4j.properties | 2 +- hadoop-tools/hadoop-benchmark/pom.xml | 94 +++++ .../hadoop-benchmark/src/main/assembly/uber.xml | 33 ++ .../hadoop-benchmark/src/main/findbugs/exclude.xml | 22 ++ .../hadoop/benchmark/VectoredReadBenchmark.java | 245 +++++++++++++ .../org/apache/hadoop/benchmark/package-info.java | 22 ++ hadoop-tools/pom.xml | 1 + pom.xml | 1 + 43 files changed, 3603 insertions(+), 97 deletions(-) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
