This is an automated email from the ASF dual-hosted git repository.
gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new a94648c7621 [fix](rpc) robust retry and exception handling for
MetaService RPC (#53670)
a94648c7621 is described below
commit a94648c762150c0836d23f3ffabb9e2eb076aa44
Author: hui lai <[email protected]>
AuthorDate: Tue Jul 22 22:34:02 2025 +0800
[fix](rpc) robust retry and exception handling for MetaService RPC (#53670)
- Ensure correct retry behavior for gRPC StatusRuntimeException:
- Only retry on UNAVAILABLE, UNKNOWN, or DEADLINE_EXCEEDED (with limited
times).
- Throw RpcException after all retries are exhausted or on non-retryable
errors.
- Move resource cleanup (client.shutdown) to finally block for better
reliability.
- Correctly handle InterruptdEException exceptions rather than ingore.
- Improve log messages for clarity and typo fix ("servive" ->
"service").
- Remove unreachable return null, always throw RpcException if all
retries fail.
---
.../apache/doris/cloud/rpc/MetaServiceProxy.java | 46 ++++++++++++----------
1 file changed, 25 insertions(+), 21 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceProxy.java
b/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceProxy.java
index 95753821c35..dc6dc11d518 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceProxy.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceProxy.java
@@ -22,7 +22,6 @@ import org.apache.doris.common.Config;
import org.apache.doris.rpc.RpcException;
import com.google.common.collect.Maps;
-import io.grpc.Status;
import io.grpc.StatusRuntimeException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -176,46 +175,51 @@ public class MetaServiceProxy {
}
public <Response> Response executeRequest(Function<MetaServiceClient,
Response> function) throws RpcException {
- int tried = 0;
- while (tried++ < Config.meta_service_rpc_retry_cnt) {
+ long maxRetries = Config.meta_service_rpc_retry_cnt;
+ for (long tried = 1; tried <= maxRetries; tried++) {
MetaServiceClient client = null;
try {
client = proxy.getProxy();
return function.apply(client);
} catch (StatusRuntimeException sre) {
- LOG.info("failed to request meta servive code {}, msg {},
trycnt {}", sre.getStatus().getCode(),
+ LOG.warn("failed to request meta service code {}, msg {},
trycnt {}", sre.getStatus().getCode(),
sre.getMessage(), tried);
- if ((tried > Config.meta_service_rpc_retry_cnt
- || (sre.getStatus().getCode() !=
Status.Code.UNAVAILABLE
- && sre.getStatus().getCode() !=
Status.Code.UNKNOWN))
- && (tried >
Config.meta_service_rpc_timeout_retry_times
- || sre.getStatus().getCode() !=
Status.Code.DEADLINE_EXCEEDED)) {
+ boolean shouldRetry = false;
+ switch (sre.getStatus().getCode()) {
+ case UNAVAILABLE:
+ case UNKNOWN:
+ shouldRetry = true;
+ break;
+ case DEADLINE_EXCEEDED:
+ shouldRetry = tried <=
Config.meta_service_rpc_timeout_retry_times;
+ break;
+ default:
+ shouldRetry = false;
+ }
+ if (!shouldRetry || tried >= maxRetries) {
throw new RpcException("", sre.getMessage(), sre);
}
} catch (Exception e) {
- LOG.info("failed to request meta servive trycnt {}",
tried, e);
- if (tried > Config.meta_service_rpc_retry_cnt) {
+ LOG.warn("failed to request meta servive trycnt {}",
tried, e);
+ if (tried >= maxRetries) {
throw new RpcException("", e.getMessage(), e);
}
- } catch (Throwable t) {
- LOG.info("failed to request meta servive trycnt {}",
tried, t);
- if (tried > Config.meta_service_rpc_retry_cnt) {
- throw new RpcException("", t.getMessage());
+ } finally {
+ if (proxy.needReconn() && client != null) {
+ client.shutdown(true);
}
}
- if (proxy.needReconn() && client != null) {
- client.shutdown(true);
- }
-
int delay = 20 + random.nextInt(200 - 20 + 1);
try {
Thread.sleep(delay);
} catch (InterruptedException interruptedException) {
- // ignore
+ Thread.currentThread().interrupt();
+ throw new RpcException("",
interruptedException.getMessage(), interruptedException);
}
}
- return null; // impossible and unreachable, just make the compiler
happy
+ // impossible and unreachable, just make the compiler happy
+ throw new RpcException("", "All retries exhausted", null);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]