Github user zsxwing commented on a diff in the pull request:
https://github.com/apache/spark/pull/10579#discussion_r48787180
--- Diff: python/pyspark/context.py ---
@@ -54,6 +54,63 @@
}
+class Py4jCallbackConnectionCleaner(object):
+
+ """
+ A cleaner to clean up callback connections that are not closed by
Py4j. See SPARK-12617.
+ It will scan all callback connections every 30 seconds and close the
dead connections.
+ """
+
+ def __init__(self, gateway):
+ self._gateway = gateway
+ self._stopped = False
+ self._timer = None
+ self._lock = RLock()
+
+ def start(self):
+ if self._stopped:
+ return
+
+ def clean_closed_connections():
+ from py4j.java_gateway import quiet_close, quiet_shutdown
+
+ callback_server = self._gateway._callback_server
+ with callback_server.lock:
+ try:
+ closed_connections = []
+ for connection in callback_server.connections:
+ if not connection.isAlive():
+ quiet_close(connection.input)
+ quiet_shutdown(connection.socket)
+ quiet_close(connection.socket)
+ closed_connections.append(connection)
+
+ for closed_connection in closed_connections:
+
callback_server.connections.remove(closed_connection)
+ except Exception:
+ import traceback
+ traceback.print_exc()
+
+ self._start_timer(clean_closed_connections)
+
+ self._start_timer(clean_closed_connections)
+
+ def _start_timer(self, f):
+ from threading import Timer
+
+ with self._lock:
+ if not self._stopped:
+ self._timer = Timer(30.0, f)
+ self._timer.start()
+
+ def stop(self):
--- End diff --
This method is not called because PySpark never stops the gateway server.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]