ueshin commented on code in PR #45073:
URL: https://github.com/apache/spark/pull/45073#discussion_r1486900195


##########
python/pyspark/sql/profiler.py:
##########
@@ -158,6 +159,70 @@ def _profile_results(self) -> "ProfileResults":
         """
         ...
 
+    def dump_perf_profiles(self, path: str, id: Optional[int] = None) -> None:
+        """
+        Dump the perf profile results into directory `path`.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        path: str
+            A directory in which to dump the perf profile.
+        id : int, optional
+            A UDF ID to be shown. If not specified, all the results will be 
shown.
+        """
+        with self._lock:
+            stats = self._perf_profile_results
+
+        def dump(path: str, id: int) -> None:
+            s = stats.get(id)
+
+            if s is not None:
+                if not os.path.exists(path):
+                    os.makedirs(path)
+                p = os.path.join(path, "udf_%d.pstats" % id)
+                s.dump_stats(p)
+
+        if id is not None:
+            dump(path, id)
+        else:
+            for id in sorted(stats.keys()):
+                dump(path, id)
+
+    def dump_memory_profiles(self, path: str, id: Optional[int] = None) -> 
None:
+        """
+        Dump the memory profile results into directory `path`.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        path: str
+            A directory in which to dump the memory profile.
+        id : int, optional
+            A UDF ID to be shown. If not specified, all the results will be 
shown.
+        """
+        with self._lock:
+            code_map = self._memory_profile_results
+
+        def dump(path: str, id: int) -> None:
+            cm = code_map.get(id)
+
+            if cm is not None:
+                if not os.path.exists(path):
+                    os.makedirs(path)
+                p = os.path.join(path, "udf_%d_memory.txt" % id)

Review Comment:
   ditto.



##########
python/pyspark/sql/profiler.py:
##########
@@ -158,6 +159,70 @@ def _profile_results(self) -> "ProfileResults":
         """
         ...
 
+    def dump_perf_profiles(self, path: str, id: Optional[int] = None) -> None:
+        """
+        Dump the perf profile results into directory `path`.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        path: str
+            A directory in which to dump the perf profile.
+        id : int, optional
+            A UDF ID to be shown. If not specified, all the results will be 
shown.
+        """
+        with self._lock:
+            stats = self._perf_profile_results
+
+        def dump(path: str, id: int) -> None:

Review Comment:
   nit: `path` is not necessary for this internal function?



##########
python/pyspark/sql/profiler.py:
##########
@@ -158,6 +159,70 @@ def _profile_results(self) -> "ProfileResults":
         """
         ...
 
+    def dump_perf_profiles(self, path: str, id: Optional[int] = None) -> None:
+        """
+        Dump the perf profile results into directory `path`.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        path: str
+            A directory in which to dump the perf profile.
+        id : int, optional
+            A UDF ID to be shown. If not specified, all the results will be 
shown.
+        """
+        with self._lock:
+            stats = self._perf_profile_results
+
+        def dump(path: str, id: int) -> None:
+            s = stats.get(id)
+
+            if s is not None:
+                if not os.path.exists(path):
+                    os.makedirs(path)
+                p = os.path.join(path, "udf_%d.pstats" % id)

Review Comment:
   `udf_%d_perf.pstats`?



##########
python/pyspark/sql/profiler.py:
##########
@@ -158,6 +159,70 @@ def _profile_results(self) -> "ProfileResults":
         """
         ...
 
+    def dump_perf_profiles(self, path: str, id: Optional[int] = None) -> None:
+        """
+        Dump the perf profile results into directory `path`.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        path: str
+            A directory in which to dump the perf profile.
+        id : int, optional
+            A UDF ID to be shown. If not specified, all the results will be 
shown.
+        """
+        with self._lock:
+            stats = self._perf_profile_results
+
+        def dump(path: str, id: int) -> None:
+            s = stats.get(id)
+
+            if s is not None:
+                if not os.path.exists(path):
+                    os.makedirs(path)
+                p = os.path.join(path, "udf_%d.pstats" % id)

Review Comment:
   btw, `f"udf_{id}_perf.pstats"`?



##########
python/pyspark/sql/profiler.py:
##########
@@ -158,6 +159,70 @@ def _profile_results(self) -> "ProfileResults":
         """
         ...
 
+    def dump_perf_profiles(self, path: str, id: Optional[int] = None) -> None:
+        """
+        Dump the perf profile results into directory `path`.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        path: str
+            A directory in which to dump the perf profile.
+        id : int, optional
+            A UDF ID to be shown. If not specified, all the results will be 
shown.
+        """
+        with self._lock:
+            stats = self._perf_profile_results
+
+        def dump(path: str, id: int) -> None:
+            s = stats.get(id)
+
+            if s is not None:
+                if not os.path.exists(path):
+                    os.makedirs(path)
+                p = os.path.join(path, "udf_%d.pstats" % id)
+                s.dump_stats(p)
+
+        if id is not None:
+            dump(path, id)
+        else:
+            for id in sorted(stats.keys()):
+                dump(path, id)
+
+    def dump_memory_profiles(self, path: str, id: Optional[int] = None) -> 
None:
+        """
+        Dump the memory profile results into directory `path`.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        path: str
+            A directory in which to dump the memory profile.
+        id : int, optional
+            A UDF ID to be shown. If not specified, all the results will be 
shown.
+        """
+        with self._lock:
+            code_map = self._memory_profile_results
+
+        def dump(path: str, id: int) -> None:

Review Comment:
   ditto.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to