Repository: hadoop Updated Branches: refs/heads/HDFS-8707 c64f61285 -> 4a74bc4fc
HDFS-10761: libhdfs++: Fix broken logic in HA retry policy. Contributed by James Clampffer Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/4a74bc4f Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/4a74bc4f Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/4a74bc4f Branch: refs/heads/HDFS-8707 Commit: 4a74bc4fcbfd5144410cfc58e8926ef4fbbf6efd Parents: c64f612 Author: James <[email protected]> Authored: Mon Aug 22 17:34:59 2016 -0400 Committer: James <[email protected]> Committed: Mon Aug 22 17:34:59 2016 -0400 ---------------------------------------------------------------------- .../native/libhdfspp/lib/common/retry_policy.cc | 11 +++-- .../main/native/libhdfspp/lib/rpc/rpc_engine.cc | 48 +++++++++----------- 2 files changed, 27 insertions(+), 32 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/4a74bc4f/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/retry_policy.cc ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/retry_policy.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/retry_policy.cc index a885d53..ef78aca 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/retry_policy.cc +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/common/retry_policy.cc @@ -56,20 +56,21 @@ RetryAction FixedDelayWithFailover::ShouldRetry(const Status &s, uint64_t retrie (void)isIdempotentOrAtMostOnce; LOG_TRACE(kRPC, << "FixedDelayWithFailover::ShouldRetry(retries=" << retries << ", failovers=" << failovers << ")"); - if(s.code() == ::asio::error::timed_out && failovers < max_failover_retries_) { + if(failovers < max_failover_retries_ && (s.code() == ::asio::error::timed_out || s.get_server_exception_type() == Status::kStandbyException) ) + { // Try connecting to another NN in case this one keeps timing out // Can add the backoff wait specified by dfs.client.failover.sleep.base.millis here return RetryAction::failover(delay_); } - if(retries < max_retries_) { - LOG_TRACE(kRPC, << "FixedDelayWithFailover::ShouldRetry: retries < max_retries_"); + if(retries < max_retries_ && failovers < max_failover_retries_) { + LOG_TRACE(kRPC, << "FixedDelayWithFailover::ShouldRetry: retries < max_retries_ && failovers < max_failover_retries_"); return RetryAction::retry(delay_); } else if (retries >= max_retries_ && failovers < max_failover_retries_) { LOG_TRACE(kRPC, << "FixedDelayWithFailover::ShouldRetry: retries >= max_retries_ && failovers < max_failover_retries_"); return RetryAction::failover(delay_); - } else if (retries >= max_retries_ && failovers == max_failover_retries_) { - LOG_TRACE(kRPC, << "FixedDelayWithFailover::ShouldRetry: retries >= max_retries_ && failovers == max_failover_retries_"); + } else if (retries <= max_retries_ && failovers == max_failover_retries_) { + LOG_TRACE(kRPC, << "FixedDelayWithFailover::ShouldRetry: retries <= max_retries_ && failovers == max_failover_retries_"); // 1 last retry on new connection return RetryAction::retry(delay_); } http://git-wip-us.apache.org/repos/asf/hadoop/blob/4a74bc4f/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/rpc/rpc_engine.cc ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/rpc/rpc_engine.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/rpc/rpc_engine.cc index be69d95..89d6a67 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/rpc/rpc_engine.cc +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/lib/rpc/rpc_engine.cc @@ -291,39 +291,33 @@ void RpcEngine::RpcCommsError( optional<RetryAction> head_action = optional<RetryAction>(); - //We are talking to the Standby NN, let's talk to the active one instead. - if(ha_persisted_info_ && status.get_server_exception_type() == Status::kStandbyException) { - LOG_INFO(kRPC, << "Received StandbyException. Failing over."); - head_action = RetryAction::failover(std::max(0,options_.rpc_retry_delay_ms)); - } else { - // Filter out anything with too many retries already - for (auto it = pendingRequests.begin(); it < pendingRequests.end();) { - auto req = *it; + // Filter out anything with too many retries already + for (auto it = pendingRequests.begin(); it < pendingRequests.end();) { + auto req = *it; - LOG_DEBUG(kRPC, << req->GetDebugString()); + LOG_DEBUG(kRPC, << req->GetDebugString()); - RetryAction retry = RetryAction::fail(""); // Default to fail + RetryAction retry = RetryAction::fail(""); // Default to fail - if (retry_policy()) { - retry = retry_policy()->ShouldRetry(status, req->IncrementRetryCount(), req->get_failover_count(), true); - } + if (retry_policy()) { + retry = retry_policy()->ShouldRetry(status, req->IncrementRetryCount(), req->get_failover_count(), true); + } - if (retry.action == RetryAction::FAIL) { - // If we've exceeded the maximum retry, take the latest error and pass it - // on. There might be a good argument for caching the first error - // rather than the last one, that gets messy + if (retry.action == RetryAction::FAIL) { + // If we've exceeded the maximum retry, take the latest error and pass it + // on. There might be a good argument for caching the first error + // rather than the last one, that gets messy - io_service().post([req, status]() { - req->OnResponseArrived(nullptr, status); // Never call back while holding a lock - }); - it = pendingRequests.erase(it); - } else { - if (!head_action) { - head_action = retry; - } - - ++it; + io_service().post([req, status]() { + req->OnResponseArrived(nullptr, status); // Never call back while holding a lock + }); + it = pendingRequests.erase(it); + } else { + if (!head_action) { + head_action = retry; } + + ++it; } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
