(spark) branch master updated: [MINOR][PYTHON] Leverage functools.cached_property in `SparkSession`

gurwls223 Thu, 26 Dec 2024 20:14:01 -0800

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new b8a8e0d526e0 [MINOR][PYTHON] Leverage functools.cached_property in 
`SparkSession`
b8a8e0d526e0 is described below

commit b8a8e0d526e0f268faf314dfb4308ab77dd1bad8
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Fri Dec 27 13:13:44 2024 +0900

    [MINOR][PYTHON] Leverage functools.cached_property in `SparkSession`
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to replace manual cached property with 
`functools.cached_property` in SparkSession.
    
    ### Why are the changes needed?
    
    To reduce code.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Existing tests.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #49306 from HyukjinKwon/use-cached-property.
    
    Authored-by: Hyukjin Kwon <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 python/pyspark/sql/connect/session.py |  6 ++----
 python/pyspark/sql/session.py         | 23 ++++++++---------------
 2 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/python/pyspark/sql/connect/session.py 
b/python/pyspark/sql/connect/session.py
index 7c40f1dd54a2..925eaaeabf60 100644
--- a/python/pyspark/sql/connect/session.py
+++ b/python/pyspark/sql/connect/session.py
@@ -790,13 +790,11 @@ class SparkSession:
 
     range.__doc__ = PySparkSession.range.__doc__
 
-    @property
+    @functools.cached_property
     def catalog(self) -> "Catalog":
         from pyspark.sql.connect.catalog import Catalog
 
-        if not hasattr(self, "_catalog"):
-            self._catalog = Catalog(self)
-        return self._catalog
+        return Catalog(self)
 
     catalog.__doc__ = PySparkSession.catalog.__doc__
 
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 1ec85e52bbcd..00fa60442b41 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -18,7 +18,7 @@ import os
 import sys
 import warnings
 from collections.abc import Sized
-from functools import reduce
+from functools import reduce, cached_property
 from threading import RLock
 from types import TracebackType
 from typing import (
@@ -773,7 +773,7 @@ class SparkSession(SparkConversionMixin):
             """
             return self._sc
 
-    @property
+    @cached_property
     def version(self) -> str:
         """
         The version of Spark on which this application is running.
@@ -794,7 +794,7 @@ class SparkSession(SparkConversionMixin):
         """
         return self._jsparkSession.version()
 
-    @property
+    @cached_property
     def conf(self) -> RuntimeConfig:
         """Runtime configuration interface for Spark.
 
@@ -822,11 +822,9 @@ class SparkSession(SparkConversionMixin):
         >>> spark.conf.get("key")
         'value'
         """
-        if not hasattr(self, "_conf"):
-            self._conf = RuntimeConfig(self._jsparkSession.conf())
-        return self._conf
+        return RuntimeConfig(self._jsparkSession.conf())
 
-    @property
+    @cached_property
     def catalog(self) -> "Catalog":
         """Interface through which the user may create, drop, alter or query 
underlying
         databases, tables, functions, etc.
@@ -854,9 +852,7 @@ class SparkSession(SparkConversionMixin):
         """
         from pyspark.sql.catalog import Catalog
 
-        if not hasattr(self, "_catalog"):
-            self._catalog = Catalog(self)
-        return self._catalog
+        return Catalog(self)
 
     @property
     def udf(self) -> "UDFRegistration":
@@ -1907,7 +1903,7 @@ class SparkSession(SparkConversionMixin):
         """
         return DataStreamReader(self)
 
-    @property
+    @cached_property
     def streams(self) -> "StreamingQueryManager":
         """Returns a :class:`StreamingQueryManager` that allows managing all 
the
         :class:`StreamingQuery` instances active on `this` context.
@@ -1941,10 +1937,7 @@ class SparkSession(SparkConversionMixin):
         """
         from pyspark.sql.streaming import StreamingQueryManager
 
-        if hasattr(self, "_sqm"):
-            return self._sqm
-        self._sqm: StreamingQueryManager = 
StreamingQueryManager(self._jsparkSession.streams())
-        return self._sqm
+        return StreamingQueryManager(self._jsparkSession.streams())
 
     @property
     def tvf(self) -> "TableValuedFunction":


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [MINOR][PYTHON] Leverage functools.cached_property in `SparkSession`

Reply via email to