HyukjinKwon commented on code in PR #46694:
URL: https://github.com/apache/spark/pull/46694#discussion_r1609622792
##########
python/pyspark/sql/connect/plan.py:
##########
@@ -547,14 +550,49 @@ class CachedRemoteRelation(LogicalPlan):
"""Logical plan object for a DataFrame reference which represents a
DataFrame that's been
cached on the server with a given id."""
- def __init__(self, relationId: str):
+ def __init__(self, relation_id: str, spark_session: "SparkSession"):
super().__init__(None)
- self._relationId = relationId
-
- def plan(self, session: "SparkConnectClient") -> proto.Relation:
- plan = self._create_proto_relation()
- plan.cached_remote_relation.relation_id = self._relationId
- return plan
+ self._relation_id = relation_id
+ # Needs to hold the session to make a request itself.
+ self._spark_session = spark_session
+
+ def plan(self, session: "SparkConnectClient") -> proto.Relation:
+ plan = self._create_proto_relation()
+ plan.cached_remote_relation.relation_id = self._relation_id
+ return plan
+
+ def __del__(self) -> None:
+ session = self._spark_session
+ # If session is already closed, all cached DataFrame should be
released.
Review Comment:
I don't think so.. we can only tell when to release at the client side
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]