zhengruifeng commented on code in PR #46694:
URL: https://github.com/apache/spark/pull/46694#discussion_r1609578841


##########
python/pyspark/sql/connect/plan.py:
##########
@@ -547,14 +550,49 @@ class CachedRemoteRelation(LogicalPlan):
     """Logical plan object for a DataFrame reference which represents a 
DataFrame that's been
     cached on the server with a given id."""
 
-    def __init__(self, relationId: str):
+    def __init__(self, relation_id: str, spark_session: "SparkSession"):
         super().__init__(None)
-        self._relationId = relationId
-
-    def plan(self, session: "SparkConnectClient") -> proto.Relation:
-        plan = self._create_proto_relation()
-        plan.cached_remote_relation.relation_id = self._relationId
-        return plan
+        self._relation_id = relation_id
+        # Needs to hold the session to make a request itself.
+        self._spark_session = spark_session
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = self._create_proto_relation()
+        plan.cached_remote_relation.relation_id = self._relation_id
+        return plan
+
+    def __del__(self) -> None:
+        session = self._spark_session
+        # If session is already closed, all cached DataFrame should be 
released.

Review Comment:
   is it possible to release those cached dataframes in server side?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to