This is an automated email from the ASF dual-hosted git repository. boyuanz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push: new f2f0282 Update docstring of ManualWatermarkEstimator.set_watermark() new 7139216 Merge pull request #10933 from boyuanzz/follow_up f2f0282 is described below commit f2f0282719194f31557d628d0fb1fc6a44d72c53 Author: Boyuan Zhang <boyu...@google.com> AuthorDate: Fri Feb 21 15:17:41 2020 -0800 Update docstring of ManualWatermarkEstimator.set_watermark() --- sdks/python/apache_beam/io/watermark_estimators.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/sdks/python/apache_beam/io/watermark_estimators.py b/sdks/python/apache_beam/io/watermark_estimators.py index 5a00ae7..65c4611 100644 --- a/sdks/python/apache_beam/io/watermark_estimators.py +++ b/sdks/python/apache_beam/io/watermark_estimators.py @@ -120,10 +120,19 @@ class ManualWatermarkEstimator(WatermarkEstimator): return self._watermark def set_watermark(self, timestamp): - # Please call set_watermark after calling restriction_tracker.try_claim() to - # prevent advancing watermark early. - # TODO(BEAM-7473): It's possible that getting a slightly stale watermark - # when performing split. + # pylint: disable=line-too-long + + """Sets a timestamp before or at the timestamps of all future elements + produced by the associated DoFn. + + This can be approximate. If records are output that violate this guarantee, + they will be considered late, which will affect how they will be processed. + See https://beam.apache.org/documentation/programming-guide/#watermarks-and-late-data + for more information on late data and how to handle it. + + However, this value should be as late as possible. Downstream windows may + not be able to close until this watermark passes their end. + """ if not isinstance(timestamp, Timestamp): raise ValueError('set_watermark expects a Timestamp as input') if self._watermark and self._watermark > timestamp: