profiler.py

GitBox Mon, 29 Nov 2021 17:31:33 -0800


ueshin commented on a change in pull request #34731:
URL: https://github.com/apache/spark/pull/34731#discussion_r758859631




##########
File path: python/pyspark/profiler.py
##########
@@ -31,54 +36,51 @@ class ProfilerCollector(object):
     the different stages.
     """
 
-    def __init__(self, profiler_cls, dump_path=None):
-        self.profiler_cls = profiler_cls
-        self.profile_dump_path = dump_path
-        self.profilers = []
+    def __init__(self, profiler_cls: Type["Profiler"], dump_path: 
Optional[str] = None):
+        self.profiler_cls: Type[Profiler] = profiler_cls
+        self.profile_dump_path: Optional[str] = dump_path
+        self.profilers: List[Tuple[int, Profiler, bool]] = []

Review comment:
       May I ask why?

##########
File path: python/pyspark/profiler.py
##########
@@ -31,54 +36,51 @@ class ProfilerCollector(object):
     the different stages.
     """
 
-    def __init__(self, profiler_cls, dump_path=None):
-        self.profiler_cls = profiler_cls
-        self.profile_dump_path = dump_path
-        self.profilers = []
+    def __init__(self, profiler_cls: Type["Profiler"], dump_path: 
Optional[str] = None):
+        self.profiler_cls: Type[Profiler] = profiler_cls
+        self.profile_dump_path: Optional[str] = dump_path
+        self.profilers: List[Tuple[int, Profiler, bool]] = []
 
-    def new_profiler(self, ctx):
+    def new_profiler(self, ctx: "SparkContext") -> "Profiler":
         """Create a new profiler using class `profiler_cls`"""
         return self.profiler_cls(ctx)
 
-    def add_profiler(self, id, profiler):
+    def add_profiler(self, id: int, profiler: "Profiler") -> None:
         """Add a profiler for RDD `id`"""
         if not self.profilers:
             if self.profile_dump_path:
                 atexit.register(self.dump_profiles, self.profile_dump_path)
             else:
                 atexit.register(self.show_profiles)
 
-        self.profilers.append([id, profiler, False])
+        self.profilers.append([id, profiler, False])  # type: ignore[arg-type]
 
-    def dump_profiles(self, path):
+    def dump_profiles(self, path: str) -> None:
         """Dump the profile stats into directory `path`"""
         for id, profiler, _ in self.profilers:
             profiler.dump(id, path)
         self.profilers = []
 
-    def show_profiles(self):
+    def show_profiles(self) -> None:
         """Print the profile stats to stdout"""
         for i, (id, profiler, showed) in enumerate(self.profilers):
             if not showed and profiler:
                 profiler.show(id)
                 # mark it as showed
-                self.profilers[i][2] = True
+                self.profilers[i][2] = True  # type: ignore[index]
 
 
 class Profiler(object):
     """
     PySpark supports custom profilers, this is to allow for different 
profilers to
     be used as well as outputting to different formats than what is provided 
in the
     BasicProfiler.
-

Review comment:
       Could you revert these changes?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] ueshin commented on a change in pull request #34731: [SPARK-37153][PYTHON] Inline type hints for python/pyspark/profiler.py

Reply via email to