Repository: incubator-impala Updated Branches: refs/heads/master 0ee6d19d5 -> 889494004
IMPALA-4740: Add option to use hdfsPread() for HDFS hedged reads In order to use HDFS hedged reads, the hdfsPread API must be used instead of the hdfsRead() call. Adds a flag to use hdfsPread: --use_hdfs_pread Testing: * Running existing tests with this flag enabled. * Cluster testing with HDFS hedged reads enabled via the HDFS client config. * Manually tested setting the 'max_chunk_size' to a small value to force multiple iterations of the while loop which would only normally happen on S3. Tested reading lineitem was OK. Change-Id: Iecc8b12aa20cbfe08f4ef6a08a191e49709d9525 Reviewed-on: http://gerrit.cloudera.org:8080/5635 Reviewed-by: Matthew Jacobs <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/88949400 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/88949400 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/88949400 Branch: refs/heads/master Commit: 889494004e429a7dab4e693e737ca2a6dd7799b9 Parents: 0ee6d19 Author: Matthew Jacobs <[email protected]> Authored: Fri Jan 6 14:49:31 2017 -0800 Committer: Impala Public Jenkins <[email protected]> Committed: Mon Jan 9 19:16:58 2017 +0000 ---------------------------------------------------------------------- be/src/runtime/disk-io-mgr-scan-range.cc | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/88949400/be/src/runtime/disk-io-mgr-scan-range.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/disk-io-mgr-scan-range.cc b/be/src/runtime/disk-io-mgr-scan-range.cc index bc676bf..5111b70 100644 --- a/be/src/runtime/disk-io-mgr-scan-range.cc +++ b/be/src/runtime/disk-io-mgr-scan-range.cc @@ -29,6 +29,10 @@ using namespace impala; const int MAX_QUEUE_CAPACITY = 128; const int MIN_QUEUE_CAPACITY = 2; +DEFINE_bool(use_hdfs_pread, false, "Enables using hdfsPread() instead of hdfsRead() " + "when performing HDFS read operations. This is necessary to use HDFS hedged reads " + "(assuming the HDFS client is configured to do so)."); + // Implementation of the ScanRange functionality. Each ScanRange contains a queue // of ready buffers. For each ScanRange, there is only a single producer and // consumer thread, i.e. only one disk thread will push to a scan range at @@ -397,7 +401,15 @@ Status DiskIoMgr::ScanRange::Read( DCHECK_GE(chunk_size, 0); // The hdfsRead() length argument is an int. DCHECK_LE(chunk_size, numeric_limits<int>::max()); - int last_read = hdfsRead(fs_, hdfs_file_->file(), buffer + *bytes_read, chunk_size); + int last_read = -1; + if (FLAGS_use_hdfs_pread) { + // bytes_read_ is only updated after the while loop + int64_t position_in_file = offset_ + bytes_read_ + *bytes_read; + last_read = hdfsPread(fs_, hdfs_file_->file(), position_in_file, + buffer + *bytes_read, chunk_size); + } else { + last_read = hdfsRead(fs_, hdfs_file_->file(), buffer + *bytes_read, chunk_size); + } if (last_read == -1) { return Status(GetHdfsErrorMsg("Error reading from HDFS file: ", file_)); } else if (last_read == 0) {
