[ 
https://issues.apache.org/jira/browse/BEAM-8537?focusedWorklogId=371338&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-371338
 ]

ASF GitHub Bot logged work on BEAM-8537:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 14/Jan/20 03:57
            Start Date: 14/Jan/20 03:57
    Worklog Time Spent: 10m 
      Work Description: boyuanzz commented on pull request #10375: [BEAM-8537] 
Provide WatermarkEstimator to track watermark
URL: https://github.com/apache/beam/pull/10375#discussion_r366142878
 
 

 ##########
 File path: sdks/python/apache_beam/runners/common.py
 ##########
 @@ -495,6 +525,162 @@ def invoke_process(self,
         windowed_value, self.process_method(windowed_value.value))
 
 
+class _ThreadsafeWatermarkEstimator(object):
+  """A threadsafe wrapper which wraps a WatermarkEstimator with locking
+  mechanism to guarantee multi-thread safety.
+  """
+  def __init__(self, watermark_estimator, lock):
+    if not isinstance(watermark_estimator, iobase.WatermarkEstimator):
+      raise ValueError('Initializing Threadsafe requires a WatermarkEstimator')
+    self._watermark_estimator = watermark_estimator
+    self._lock = lock
+
+  def __getattr__(self, attr):
+    if hasattr(self._watermark_estimator, attr):
+      def method_wrapper(*args, **kw):
+        with self._lock:
+          return getattr(self._watermark_estimator, attr)(*args, **kw)
+      return method_wrapper
+    raise AttributeError(attr)
+
+  def get_estimator_state(self):
+    # The caller should hold the lock before entering this function.
+    return self._watermark_estimator.get_estimator_state()
+
+  def current_watermark(self):
+    # The caller should hold the lock before entering this function.
+    return self._watermark_estimator.current_watermark()
+
+  def observe_timestamp(self, timestamp):
+    if not isinstance(timestamp, Timestamp):
+      raise ValueError('Input of observe_timestamp should be a Timestamp '
+                       'object')
+    with self._lock:
+      self._watermark_estimator.observe_timestamp(timestamp)
+
+
+class _ThreadsafeRestrictionTracker(object):
+  """A thread-safe wrapper which wraps a `RestritionTracker`.
+
+  This wrapper guarantees synchronization of modifying restrictions across
+  multi-thread.
+  """
+
+  def __init__(self, restriction_tracker, lock):
+    if not isinstance(restriction_tracker, iobase.RestrictionTracker):
+      raise ValueError(
+          'Initialize ThreadsafeRestrictionTracker requires'
+          'RestrictionTracker.')
+    self._restriction_tracker = restriction_tracker
+    # Records an absolute timestamp when defer_remainder is called.
+    self._deferred_timestamp = None
+    self._lock = lock
+    self._deferred_residual = None
+    self._deferred_watermark = None
+
+  def current_restriction(self):
+    with self._lock:
+      return self._restriction_tracker.current_restriction()
+
+  def try_claim(self, position):
+    with self._lock:
+      return self._restriction_tracker.try_claim(position)
+
+  def defer_remainder(self, deferred_time=None):
+    """Performs self-checkpoint on current processing restriction with an
+    expected resuming time.
+
+    Self-checkpoint could happen during processing elements. When executing an
+    DoFn.process(), you may want to stop processing an element and resuming
+    later if current element has been processed quit a long time or you also
+    want to have some outputs from other elements. ``defer_remainder()`` can be
+    called on per element if needed.
+
+    Args:
+      deferred_time: A relative ``Duration`` that indicates the ideal time gap
+      between now and resuming, or an absolute ``Timestamp`` for resuming
+      execution time. If the time_delay is None, the deferred work will be
+      executed as soon as possible.
+    """
+
+    # Record current time for calculating deferred_time later.
+    self._deferred_timestamp = Timestamp.now()
+    if (deferred_time and
+        not isinstance(deferred_time, Duration) and
+        not isinstance(deferred_time, Timestamp)):
+      raise ValueError('The timestamp of deter_remainder() should be a '
+                       'Duration or a Timestamp, or None.')
+    self._deferred_watermark = deferred_time
+    checkpoint = self.try_split(0)
+    if checkpoint:
+      _, self._deferred_residual = checkpoint
+
+  def check_done(self):
+    with self._lock:
+      return self._restriction_tracker.check_done()
+
+  def current_progress(self):
+    with self._lock:
+      return self._restriction_tracker.current_progress()
+
+  def try_split(self, fraction_of_remainder):
+    # The caller should hold the lock before entering this function.
 
 Review comment:
   Thanks! Changed it to `raise RuntimeError`
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


Issue Time Tracking
-------------------

    Worklog Id:     (was: 371338)
    Time Spent: 5h 40m  (was: 5.5h)

> Provide WatermarkEstimatorProvider for different types of WatermarkEstimator
> ----------------------------------------------------------------------------
>
>                 Key: BEAM-8537
>                 URL: https://issues.apache.org/jira/browse/BEAM-8537
>             Project: Beam
>          Issue Type: Improvement
>          Components: sdk-py-core, sdk-py-harness
>            Reporter: Boyuan Zhang
>            Assignee: Boyuan Zhang
>            Priority: Major
>          Time Spent: 5h 40m
>  Remaining Estimate: 0h
>
> This is a follow up for in-progress PR:  
> https://github.com/apache/beam/pull/9794.
> Current implementation in PR9794 provides a default implementation of 
> WatermarkEstimator. For further work, we want to let WatermarkEstimator to be 
> a pure Interface. We'll provide a WatermarkEstimatorProvider to be able to 
> create a custom WatermarkEstimator per windowed value. It should be similar 
> to how we track restriction for SDF: 
> WatermarkEstimator <---> RestrictionTracker 
> WatermarkEstimatorProvider <---> RestrictionTrackerProvider
> WatermarkEstimatorParam <---> RestrictionDoFnParam



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to