grundprinzip commented on code in PR #42818:
URL: https://github.com/apache/spark/pull/42818#discussion_r1316884962
##########
python/pyspark/sql/connect/client/core.py:
##########
@@ -585,11 +585,39 @@ class SparkConnectClient(object):
@classmethod
def retry_exception(cls, e: Exception) -> bool:
- if isinstance(e, grpc.RpcError):
- return e.code() == grpc.StatusCode.UNAVAILABLE
- else:
+ """
+ Helper function that is used to identify if an exception thrown by the
server
+ can be retried or not.
+
+ Parameters
+ ----------
+ e : Exception
+ The GRPC error as received from the server. Typed as Exception,
because other exception
+ thrown during client processing can be passed here as well.
+
+ Returns
+ -------
+ True if the exception can be retried, False otherwise.
+
+ """
+ if not isinstance(e, grpc.RpcError):
return False
+ if e.code() in [
+ grpc.StatusCode.INTERNAL
+ ]:
+ # This error happens if another RPC preempts this RPC, retry.
+ msg_cursor_disconnected = "INVALID_CURSOR.DISCONNECTED"
+
+ msg = str(e)
+ if any(map(lambda x: x in msg, [msg_cursor_disconnected])):
+ return True
Review Comment:
we have some logic to convert the error messages from random RPC errors to
spark understandable exceptions. I'm wondering if we should leverage this here
as well.
pyspark.errors.exceptions.connect.convert()
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]