pabloem commented on a change in pull request #12800:
URL: https://github.com/apache/beam/pull/12800#discussion_r493063549



##########
File path: 
sdks/python/apache_beam/runners/interactive/interactive_environment.py
##########
@@ -348,6 +352,48 @@ def evict_cache_manager(self, pipeline=None):
       return self._cache_managers.pop(str(id(pipeline)), None)
     self._cache_managers.clear()
 
+  def set_recording_manager(self, recording_manager, pipeline):
+    """Sets the recording manager for the given pipeline."""
+    if self.get_recording_manager(pipeline) is recording_manager:
+      # NOOP if setting to the same recording_manager.
+      return
+    self._recording_managers[str(id(pipeline))] = recording_manager
+
+  def get_recording_manager(self, pipeline, create_if_absent=False):
+    """Gets the recording manager for the given pipeline."""
+    recording_manager = self._recording_managers.get(str(id(pipeline)), None)
+    if not recording_manager and create_if_absent:
+      recording_manager = RecordingManager(pipeline)
+      self._recording_managers[str(id(pipeline))] = recording_manager
+    return recording_manager
+
+  def evict_recording_manager(self, pipeline):
+    """Evicts the recording manager for the given pipeline.
+
+    This stops the background caching job and clears the cache.
+    Noop if the pipeline is absent from the environment. If no
+    pipeline is specified, evicts for all pipelines.
+    """
+    if not pipeline:
+      for rm in self._recording_managers.values():
+        rm.cancel()
+        rm.clear()
+      self._recording_managers = {}
+      return
+
+    recording_manager = self.get_recording_manager(pipeline)

Review comment:
       it should also be removed, right? (`del self._recording_managers[id]` or 
someting like that?)

##########
File path: sdks/python/apache_beam/runners/interactive/interactive_beam.py
##########
@@ -199,6 +199,93 @@ def display_timezone(self, value):
     self._display_timezone = value
 
 
+class Recordings():
+  """An introspection interface for recordings for pipelines.
+
+  When a user materializes a PCollection onto disk (eg. ib.show) for a 
streaming
+  pipeline, a background recording job is started. This job pulls data from all
+  defined unbounded sources for that PCollection's pipeline. The following
+  methods allow for introspection into that background recording job.
+  """
+  def describe(self, pipeline=None):
+    # type: (Optional[beam.Pipeline]) -> dict[str, Any]
+
+    """Returns a description of all the recordings for the given pipeline.
+
+    If no pipeline is given then this returns a dictionary of descriptions for
+    all pipelines.
+    """
+
+    watching = ie.current_env().watching()
+    description = ie.current_env().describe_all_recordings()
+
+    # In the case that the user has multiple pipelines, this correlates the
+    # pipeline object to the variable name.

Review comment:
       this function looks to be somewhat inefficient. WDYT? No need to change 
it if you don't think it will be heavily used / not too many elements will be 
watched (iterated by the function), but WDYT?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to