Repository: beam Updated Branches: refs/heads/master 926157d42 -> 9b5a8e62c
Moving the data file for trigger tests to testing/data Project: http://git-wip-us.apache.org/repos/asf/beam/repo Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/911128b2 Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/911128b2 Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/911128b2 Branch: refs/heads/master Commit: 911128b287909e2e2240e0674223df7c66e952f8 Parents: 926157d Author: Ahmet Altay <[email protected]> Authored: Mon May 15 09:59:29 2017 -0700 Committer: Ahmet Altay <[email protected]> Committed: Mon May 15 13:34:06 2017 -0700 ---------------------------------------------------------------------- .../testing/data/trigger_transcripts.yaml | 224 +++++++++++++++++++ .../apache_beam/transforms/trigger_test.py | 5 +- .../transforms/trigger_transcripts.yaml | 224 ------------------- 3 files changed, 227 insertions(+), 226 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/beam/blob/911128b2/sdks/python/apache_beam/testing/data/trigger_transcripts.yaml ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/testing/data/trigger_transcripts.yaml b/sdks/python/apache_beam/testing/data/trigger_transcripts.yaml new file mode 100644 index 0000000..a736e94 --- /dev/null +++ b/sdks/python/apache_beam/testing/data/trigger_transcripts.yaml @@ -0,0 +1,224 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: fixed_default +window_fn: FixedWindows(10) # Python names/syntax, unless otherwise noted. +trigger_fn: Default # Same. Empty () may be omitted. +transcript: # Ordered list of events. + - input: [1, 2, 3, 10, 11] # The elements are the timestamps. + - watermark: 25 + - expect: # Every expected output from the last action. + - {window: [0, 9], values: [1, 2, 3]} + - {window: [10, 19], values: [10, 11]} # Partial match on attributes OK. + +--- +name: fixed_default_late_data +window_fn: FixedWindows(10) +trigger_fn: Default +timestamp_combiner: OUTPUT_AT_EOW +transcript: + - input: [1, 2, 3, 10, 11, 25] + - watermark: 100 + - expect: + - {window: [0, 9], values: [1, 2, 3], timestamp: 10, final: false} + - {window: [10, 19], values: [10, 11], timestamp: 20} + - {window: [20, 29], values: [25], timestamp: 30, late: false} + - input: [7] + - expect: + - {window: [0, 9], values: [1, 2, 3, 7], timestamp: 10, late: true} + +--- +name: timestamp_combiner_earliest +window_fn: FixedWindows(10) +trigger_fn: Default +timestamp_combiner: OUTPUT_AT_EARLIEST +transcript: + - input: [1, 2, 3, 10, 11, 25] + - watermark: 100 + - expect: + - {window: [0, 9], values: [1, 2, 3], timestamp: 1, final: false} + - {window: [10, 19], values: [10, 11], timestamp: 10} + - {window: [20, 29], values: [25], timestamp: 25, late: false} + +--- +name: timestamp_combiner_latest +window_fn: FixedWindows(10) +trigger_fn: Default +timestamp_combiner: OUTPUT_AT_LATEST +transcript: + - input: [1, 2, 3, 10, 11, 25] + - watermark: 100 + - expect: + - {window: [0, 9], values: [1, 2, 3], timestamp: 3, final: false} + - {window: [10, 19], values: [10, 11], timestamp: 11} + - {window: [20, 29], values: [25], timestamp: 25, late: false} + +--- +# Test that custom timestamping is not invoked. +name: timestamp_combiner_custom_timestamping_eow +window_fn: CustomTimestampingFixedWindowsWindowFn(10) +trigger_fn: Default +timestamp_combiner: OUTPUT_AT_EOW +transcript: + - input: [1, 2, 3, 10, 11, 25] + - watermark: 100 + - expect: + - {window: [0, 9], values: [1, 2, 3], timestamp: 10, final: false} + - {window: [10, 19], values: [10, 11], timestamp: 20} + - {window: [20, 29], values: [25], timestamp: 30, late: false} + +--- +# Test that custom timestamping is not invoked. +name: timestamp_combiner_custom_timestamping_earliest +window_fn: CustomTimestampingFixedWindowsWindowFn(10) +trigger_fn: Default +timestamp_combiner: OUTPUT_AT_EARLIEST +transcript: + - input: [1, 2, 3, 10, 11, 25] + - watermark: 100 + - expect: + - {window: [0, 9], values: [1, 2, 3], timestamp: 1, final: false} + - {window: [10, 19], values: [10, 11], timestamp: 10} + - {window: [20, 29], values: [25], timestamp: 25, late: false} + +--- +# Test that custom timestamping is in fact invoked. +name: timestamp_combiner_custom_timestamping_earliest +window_fn: CustomTimestampingFixedWindowsWindowFn(10) +trigger_fn: Default +timestamp_combiner: OUTPUT_AT_EARLIEST_TRANSFORMED +transcript: + - input: [1, 2, 3, 10, 11, 25] + - watermark: 100 + - expect: + - {window: [0, 9], values: [1, 2, 3], timestamp: 101, final: false} + - {window: [10, 19], values: [10, 11], timestamp: 110} + - {window: [20, 29], values: [25], timestamp: 125, late: false} + +--- +name: early_late_sessions +window_fn: Sessions(10) +trigger_fn: AfterWatermark(early=AfterCount(2), late=AfterCount(3)) +timestamp_combiner: OUTPUT_AT_EOW +transcript: + - input: [1, 2, 3] + - expect: + - {window: [1, 12], values: [1, 2, 3], timestamp: 13, early: true} + - input: [4] # no output + - input: [5] + - expect: + - {window: [1, 14], values: [1, 2, 3, 4, 5], timestamp: 14, early: true} + - input: [6] + - watermark: 100 + - expect: + - {window: [1, 15], values:[1, 2, 3, 4, 5, 6], timestamp: 16, + final: true} + - input: [1] + - input: [3, 4] + - expect: + - {window: [1, 15], values: [1, 1, 2, 3, 3, 4, 4, 5, 6], timestamp: 16} + +--- +name: garbage_collection +window_fn: FixedWindows(10) +trigger_fn: AfterCount(2) +timestamp_combiner: OUTPUT_AT_EOW +allowed_lateness: 10 +accumulation_mode: discarding +transcript: + - input: [1, 2, 3, 10, 11, 25] + - expect: + - {window: [0, 9], timestamp: 10} + - {window: [10, 19], timestamp: 20} + - state: + present: [[20, 29]] + absent: [[0, 9]] + tombstone: [[10, 19]] + +--- +name: known_late_data_watermark +window_fn: FixedWindows(10) +trigger_fn: Default +timestamp_combiner: OUTPUT_AT_EARLIEST +transcript: + - watermark: 5 + - input: [2, 3, 7, 8] + - watermark: 11 + - expect: + - {window: [0, 9], values: [2, 3, 7, 8], timestamp: 7} + +--- +name: known_late_data_no_watermark_hold_possible +window_fn: FixedWindows(10) +trigger_fn: Default +timestamp_combiner: OUTPUT_AT_EARLIEST +transcript: + - watermark: 8 + - input: [2, 3, 7] + - watermark: 11 + - expect: + - {window: [0, 9], values: [2, 3, 7], timestamp: 10} + +# These next examples test that bad/incomplete transcripts are rejected. +--- +name: bad_output +error: Unmatched output +windowfn: FixedWindows(10) +transcript: + - input: [1, 2, 3] + - expect: + - {window: [0, 9], values: [1, 2, 3]} # bad + - watermark: 100 + +--- +name: bad_expected_values +error: Unmatched output +window_fn: FixedWindows(10) +transcript: + - input: [1, 2, 3] + - watermark: 100 + - expect: + - {window: [0, 9], values: [1, 2]} # bad values + +--- +name: bad_expected_window +error: Unmatched output +window_fn: FixedWindows(10) +transcript: + - input: [1, 2, 3] + - watermark: 100 + - expect: + - {window: [0, 19], values: [1, 2, 3]} # bad window + +--- +name: missing_output +error: Unexpected output +window_fn: FixedWindows(10) +transcript: + - input: [1, 2, 3] + - watermark: 100 + # missing output + - watermark: 200 + +--- +name: missing_output_at_end +error: Unexpected output +window_fn: FixedWindows(10) +transcript: + - input: [1, 2, 3] + - watermark: 100 + # missing output http://git-wip-us.apache.org/repos/asf/beam/blob/911128b2/sdks/python/apache_beam/transforms/trigger_test.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/transforms/trigger_test.py b/sdks/python/apache_beam/transforms/trigger_test.py index 4a5affb..1ae1f02 100644 --- a/sdks/python/apache_beam/transforms/trigger_test.py +++ b/sdks/python/apache_beam/transforms/trigger_test.py @@ -590,8 +590,9 @@ class TranscriptTest(unittest.TestCase): self.assertEquals([], output, msg='Unexpected output: %s' % output) -TRANSCRIPT_TEST_FILE = os.path.join(os.path.dirname(__file__), - 'trigger_transcripts.yaml') +TRANSCRIPT_TEST_FILE = os.path.join( + os.path.dirname(__file__), '..', 'testing', 'data', + 'trigger_transcripts.yaml') if os.path.exists(TRANSCRIPT_TEST_FILE): TranscriptTest._create_tests(TRANSCRIPT_TEST_FILE) http://git-wip-us.apache.org/repos/asf/beam/blob/911128b2/sdks/python/apache_beam/transforms/trigger_transcripts.yaml ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/transforms/trigger_transcripts.yaml b/sdks/python/apache_beam/transforms/trigger_transcripts.yaml deleted file mode 100644 index a736e94..0000000 --- a/sdks/python/apache_beam/transforms/trigger_transcripts.yaml +++ /dev/null @@ -1,224 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -name: fixed_default -window_fn: FixedWindows(10) # Python names/syntax, unless otherwise noted. -trigger_fn: Default # Same. Empty () may be omitted. -transcript: # Ordered list of events. - - input: [1, 2, 3, 10, 11] # The elements are the timestamps. - - watermark: 25 - - expect: # Every expected output from the last action. - - {window: [0, 9], values: [1, 2, 3]} - - {window: [10, 19], values: [10, 11]} # Partial match on attributes OK. - ---- -name: fixed_default_late_data -window_fn: FixedWindows(10) -trigger_fn: Default -timestamp_combiner: OUTPUT_AT_EOW -transcript: - - input: [1, 2, 3, 10, 11, 25] - - watermark: 100 - - expect: - - {window: [0, 9], values: [1, 2, 3], timestamp: 10, final: false} - - {window: [10, 19], values: [10, 11], timestamp: 20} - - {window: [20, 29], values: [25], timestamp: 30, late: false} - - input: [7] - - expect: - - {window: [0, 9], values: [1, 2, 3, 7], timestamp: 10, late: true} - ---- -name: timestamp_combiner_earliest -window_fn: FixedWindows(10) -trigger_fn: Default -timestamp_combiner: OUTPUT_AT_EARLIEST -transcript: - - input: [1, 2, 3, 10, 11, 25] - - watermark: 100 - - expect: - - {window: [0, 9], values: [1, 2, 3], timestamp: 1, final: false} - - {window: [10, 19], values: [10, 11], timestamp: 10} - - {window: [20, 29], values: [25], timestamp: 25, late: false} - ---- -name: timestamp_combiner_latest -window_fn: FixedWindows(10) -trigger_fn: Default -timestamp_combiner: OUTPUT_AT_LATEST -transcript: - - input: [1, 2, 3, 10, 11, 25] - - watermark: 100 - - expect: - - {window: [0, 9], values: [1, 2, 3], timestamp: 3, final: false} - - {window: [10, 19], values: [10, 11], timestamp: 11} - - {window: [20, 29], values: [25], timestamp: 25, late: false} - ---- -# Test that custom timestamping is not invoked. -name: timestamp_combiner_custom_timestamping_eow -window_fn: CustomTimestampingFixedWindowsWindowFn(10) -trigger_fn: Default -timestamp_combiner: OUTPUT_AT_EOW -transcript: - - input: [1, 2, 3, 10, 11, 25] - - watermark: 100 - - expect: - - {window: [0, 9], values: [1, 2, 3], timestamp: 10, final: false} - - {window: [10, 19], values: [10, 11], timestamp: 20} - - {window: [20, 29], values: [25], timestamp: 30, late: false} - ---- -# Test that custom timestamping is not invoked. -name: timestamp_combiner_custom_timestamping_earliest -window_fn: CustomTimestampingFixedWindowsWindowFn(10) -trigger_fn: Default -timestamp_combiner: OUTPUT_AT_EARLIEST -transcript: - - input: [1, 2, 3, 10, 11, 25] - - watermark: 100 - - expect: - - {window: [0, 9], values: [1, 2, 3], timestamp: 1, final: false} - - {window: [10, 19], values: [10, 11], timestamp: 10} - - {window: [20, 29], values: [25], timestamp: 25, late: false} - ---- -# Test that custom timestamping is in fact invoked. -name: timestamp_combiner_custom_timestamping_earliest -window_fn: CustomTimestampingFixedWindowsWindowFn(10) -trigger_fn: Default -timestamp_combiner: OUTPUT_AT_EARLIEST_TRANSFORMED -transcript: - - input: [1, 2, 3, 10, 11, 25] - - watermark: 100 - - expect: - - {window: [0, 9], values: [1, 2, 3], timestamp: 101, final: false} - - {window: [10, 19], values: [10, 11], timestamp: 110} - - {window: [20, 29], values: [25], timestamp: 125, late: false} - ---- -name: early_late_sessions -window_fn: Sessions(10) -trigger_fn: AfterWatermark(early=AfterCount(2), late=AfterCount(3)) -timestamp_combiner: OUTPUT_AT_EOW -transcript: - - input: [1, 2, 3] - - expect: - - {window: [1, 12], values: [1, 2, 3], timestamp: 13, early: true} - - input: [4] # no output - - input: [5] - - expect: - - {window: [1, 14], values: [1, 2, 3, 4, 5], timestamp: 14, early: true} - - input: [6] - - watermark: 100 - - expect: - - {window: [1, 15], values:[1, 2, 3, 4, 5, 6], timestamp: 16, - final: true} - - input: [1] - - input: [3, 4] - - expect: - - {window: [1, 15], values: [1, 1, 2, 3, 3, 4, 4, 5, 6], timestamp: 16} - ---- -name: garbage_collection -window_fn: FixedWindows(10) -trigger_fn: AfterCount(2) -timestamp_combiner: OUTPUT_AT_EOW -allowed_lateness: 10 -accumulation_mode: discarding -transcript: - - input: [1, 2, 3, 10, 11, 25] - - expect: - - {window: [0, 9], timestamp: 10} - - {window: [10, 19], timestamp: 20} - - state: - present: [[20, 29]] - absent: [[0, 9]] - tombstone: [[10, 19]] - ---- -name: known_late_data_watermark -window_fn: FixedWindows(10) -trigger_fn: Default -timestamp_combiner: OUTPUT_AT_EARLIEST -transcript: - - watermark: 5 - - input: [2, 3, 7, 8] - - watermark: 11 - - expect: - - {window: [0, 9], values: [2, 3, 7, 8], timestamp: 7} - ---- -name: known_late_data_no_watermark_hold_possible -window_fn: FixedWindows(10) -trigger_fn: Default -timestamp_combiner: OUTPUT_AT_EARLIEST -transcript: - - watermark: 8 - - input: [2, 3, 7] - - watermark: 11 - - expect: - - {window: [0, 9], values: [2, 3, 7], timestamp: 10} - -# These next examples test that bad/incomplete transcripts are rejected. ---- -name: bad_output -error: Unmatched output -windowfn: FixedWindows(10) -transcript: - - input: [1, 2, 3] - - expect: - - {window: [0, 9], values: [1, 2, 3]} # bad - - watermark: 100 - ---- -name: bad_expected_values -error: Unmatched output -window_fn: FixedWindows(10) -transcript: - - input: [1, 2, 3] - - watermark: 100 - - expect: - - {window: [0, 9], values: [1, 2]} # bad values - ---- -name: bad_expected_window -error: Unmatched output -window_fn: FixedWindows(10) -transcript: - - input: [1, 2, 3] - - watermark: 100 - - expect: - - {window: [0, 19], values: [1, 2, 3]} # bad window - ---- -name: missing_output -error: Unexpected output -window_fn: FixedWindows(10) -transcript: - - input: [1, 2, 3] - - watermark: 100 - # missing output - - watermark: 200 - ---- -name: missing_output_at_end -error: Unexpected output -window_fn: FixedWindows(10) -transcript: - - input: [1, 2, 3] - - watermark: 100 - # missing output
