robertwb commented on code in PR #27338: URL: https://github.com/apache/beam/pull/27338#discussion_r1260357066
########## sdks/python/apache_beam/yaml/yaml_transform_scope_test.py: ########## @@ -0,0 +1,213 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import unittest + +import yaml + +import apache_beam as beam +from apache_beam.yaml import yaml_provider +from apache_beam.yaml.yaml_transform import SafeLineLoader +from apache_beam.yaml.yaml_transform import Scope + + +class ScopeTest(unittest.TestCase): + def get_scope_by_spec(self, p, spec): + spec = yaml.load(spec, Loader=SafeLineLoader) + + scope = Scope( + beam.pvalue.PBegin(p), {}, + spec['transforms'], + yaml_provider.standard_providers(), {}) + return scope, spec + + def test_get_pcollection_input(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + elements = p | beam.Create(range(3)) + scope = Scope( + p, {'input': elements}, + transforms=[], + providers=yaml_provider.standard_providers(), + input_providers={}) + + result = scope.get_pcollection('input') + self.assertEqual("PCollection[Create/Map(decode).None]", str(result)) + + def test_get_pcollection_output(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + spec = ''' + transforms: + - type: Create + elements: [0, 1, 3, 4] + - type: PyMap + name: Square + input: Create + fn: "lambda x: x*x" + ''' + + scope, spec = self.get_scope_by_spec(p, spec) + + result = scope.get_pcollection("Create") Review Comment: Nit: rather than re-assigning result, just do ``` self.assertEqual( "PCollection[Create/Map(decode).None]", str(scope.get_pcollection("Create"))) ``` ########## sdks/python/apache_beam/yaml/yaml_transform_scope_test.py: ########## @@ -0,0 +1,213 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import unittest + +import yaml + +import apache_beam as beam +from apache_beam.yaml import yaml_provider +from apache_beam.yaml.yaml_transform import SafeLineLoader +from apache_beam.yaml.yaml_transform import Scope + + +class ScopeTest(unittest.TestCase): + def get_scope_by_spec(self, p, spec): + spec = yaml.load(spec, Loader=SafeLineLoader) + + scope = Scope( + beam.pvalue.PBegin(p), {}, + spec['transforms'], + yaml_provider.standard_providers(), {}) + return scope, spec + + def test_get_pcollection_input(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + elements = p | beam.Create(range(3)) + scope = Scope( + p, {'input': elements}, + transforms=[], + providers=yaml_provider.standard_providers(), + input_providers={}) + + result = scope.get_pcollection('input') + self.assertEqual("PCollection[Create/Map(decode).None]", str(result)) + + def test_get_pcollection_output(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + spec = ''' + transforms: + - type: Create + elements: [0, 1, 3, 4] + - type: PyMap + name: Square + input: Create + fn: "lambda x: x*x" + ''' + + scope, spec = self.get_scope_by_spec(p, spec) + + result = scope.get_pcollection("Create") + self.assertEqual("PCollection[Create/Map(decode).None]", str(result)) + + result = scope.get_pcollection("Square") + self.assertEqual("PCollection[Square.None]", str(result)) + + result = scope.get_pcollection("PyMap") + self.assertEqual("PCollection[Square.None]", str(result)) Review Comment: Maybe test getting the same PCollection twice returns the exact same PCollection? ########## sdks/python/apache_beam/yaml/yaml_transform_scope_test.py: ########## @@ -0,0 +1,213 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import unittest + +import yaml + +import apache_beam as beam +from apache_beam.yaml import yaml_provider +from apache_beam.yaml.yaml_transform import SafeLineLoader +from apache_beam.yaml.yaml_transform import Scope + + +class ScopeTest(unittest.TestCase): + def get_scope_by_spec(self, p, spec): + spec = yaml.load(spec, Loader=SafeLineLoader) + + scope = Scope( + beam.pvalue.PBegin(p), {}, + spec['transforms'], + yaml_provider.standard_providers(), {}) + return scope, spec + + def test_get_pcollection_input(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + elements = p | beam.Create(range(3)) + scope = Scope( + p, {'input': elements}, + transforms=[], + providers=yaml_provider.standard_providers(), + input_providers={}) + + result = scope.get_pcollection('input') + self.assertEqual("PCollection[Create/Map(decode).None]", str(result)) + + def test_get_pcollection_output(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + spec = ''' + transforms: + - type: Create + elements: [0, 1, 3, 4] + - type: PyMap + name: Square + input: Create + fn: "lambda x: x*x" + ''' + + scope, spec = self.get_scope_by_spec(p, spec) + + result = scope.get_pcollection("Create") + self.assertEqual("PCollection[Create/Map(decode).None]", str(result)) + + result = scope.get_pcollection("Square") + self.assertEqual("PCollection[Square.None]", str(result)) + + result = scope.get_pcollection("PyMap") + self.assertEqual("PCollection[Square.None]", str(result)) + + def test_get_pcollection_ambigous_name(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + spec = ''' + transforms: + - type: Create + elements: [0, 1, 3, 4] + - type: PyMap + fn: "lambda x: x*x" + - type: PyMap + fn: "lambda x: x*x*x" + ''' + + scope, spec = self.get_scope_by_spec(p, spec) + + with self.assertRaisesRegex(ValueError, r'Ambiguous.*'): + scope.get_pcollection("PyMap") + + def test_unique_name_by_name(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + spec = ''' + transforms: + - type: Create + name: MyElements + elements: [0, 1, 3, 4] + ''' + scope, spec = self.get_scope_by_spec(p, spec) + + spec_transform = spec['transforms'][0] + p_transform = scope.create_ptransform(spec_transform, []) + + result = scope.unique_name(spec_transform, p_transform) + self.assertEqual(result, "MyElements") + self.assertIn("MyElements", scope._seen_names) + + result = scope.unique_name(spec_transform, p_transform) Review Comment: We probably shouldn't be giving a new name when passing the same spec twice, but I guess that's not a realistic scenario. TBD if we want to disallow transforms with the same explicit name. ########## sdks/python/apache_beam/yaml/yaml_transform_scope_test.py: ########## @@ -0,0 +1,213 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import unittest + +import yaml + +import apache_beam as beam +from apache_beam.yaml import yaml_provider +from apache_beam.yaml.yaml_transform import SafeLineLoader +from apache_beam.yaml.yaml_transform import Scope + + +class ScopeTest(unittest.TestCase): + def get_scope_by_spec(self, p, spec): + spec = yaml.load(spec, Loader=SafeLineLoader) + + scope = Scope( + beam.pvalue.PBegin(p), {}, + spec['transforms'], + yaml_provider.standard_providers(), {}) + return scope, spec + + def test_get_pcollection_input(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + elements = p | beam.Create(range(3)) + scope = Scope( + p, {'input': elements}, + transforms=[], + providers=yaml_provider.standard_providers(), + input_providers={}) + + result = scope.get_pcollection('input') + self.assertEqual("PCollection[Create/Map(decode).None]", str(result)) + + def test_get_pcollection_output(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + spec = ''' + transforms: + - type: Create + elements: [0, 1, 3, 4] + - type: PyMap + name: Square + input: Create + fn: "lambda x: x*x" + ''' + + scope, spec = self.get_scope_by_spec(p, spec) + + result = scope.get_pcollection("Create") + self.assertEqual("PCollection[Create/Map(decode).None]", str(result)) + + result = scope.get_pcollection("Square") + self.assertEqual("PCollection[Square.None]", str(result)) + + result = scope.get_pcollection("PyMap") + self.assertEqual("PCollection[Square.None]", str(result)) + + def test_get_pcollection_ambigous_name(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + spec = ''' + transforms: + - type: Create + elements: [0, 1, 3, 4] + - type: PyMap + fn: "lambda x: x*x" + - type: PyMap + fn: "lambda x: x*x*x" + ''' + + scope, spec = self.get_scope_by_spec(p, spec) + + with self.assertRaisesRegex(ValueError, r'Ambiguous.*'): + scope.get_pcollection("PyMap") + + def test_unique_name_by_name(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + spec = ''' + transforms: + - type: Create + name: MyElements + elements: [0, 1, 3, 4] + ''' + scope, spec = self.get_scope_by_spec(p, spec) + + spec_transform = spec['transforms'][0] + p_transform = scope.create_ptransform(spec_transform, []) + + result = scope.unique_name(spec_transform, p_transform) + self.assertEqual(result, "MyElements") + self.assertIn("MyElements", scope._seen_names) + + result = scope.unique_name(spec_transform, p_transform) + self.assertIn("MyElements@3", scope._seen_names) + self.assertEqual(result, "MyElements@3") + + def test_unique_name_by_label(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + spec = ''' + transforms: + - type: Create + elements: [0, 1, 3, 4] + ''' + scope, spec = self.get_scope_by_spec(p, spec) + + spec_transform = spec['transforms'][0] + p_transform = scope.create_ptransform(spec_transform, []) + + result = scope.unique_name(spec_transform, p_transform) + self.assertEqual(result, "Create") + self.assertIn("Create", scope._seen_names) + + result = scope.unique_name(spec_transform, p_transform) Review Comment: Again, we shouldn't be this on the same transform twice. ########## sdks/python/apache_beam/yaml/yaml_transform_ut_test.py: ########## @@ -0,0 +1,301 @@ +# Review Comment: I'm assuming you'll merge with #27086 and move rather than duplicate these? ########## sdks/python/apache_beam/yaml/yaml_transform_scope_test.py: ########## @@ -0,0 +1,213 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import unittest + +import yaml + +import apache_beam as beam +from apache_beam.yaml import yaml_provider +from apache_beam.yaml.yaml_transform import SafeLineLoader +from apache_beam.yaml.yaml_transform import Scope + + +class ScopeTest(unittest.TestCase): + def get_scope_by_spec(self, p, spec): + spec = yaml.load(spec, Loader=SafeLineLoader) + + scope = Scope( + beam.pvalue.PBegin(p), {}, + spec['transforms'], + yaml_provider.standard_providers(), {}) + return scope, spec + + def test_get_pcollection_input(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + elements = p | beam.Create(range(3)) + scope = Scope( + p, {'input': elements}, + transforms=[], + providers=yaml_provider.standard_providers(), + input_providers={}) + + result = scope.get_pcollection('input') + self.assertEqual("PCollection[Create/Map(decode).None]", str(result)) + + def test_get_pcollection_output(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + spec = ''' + transforms: + - type: Create + elements: [0, 1, 3, 4] + - type: PyMap + name: Square + input: Create + fn: "lambda x: x*x" + ''' + + scope, spec = self.get_scope_by_spec(p, spec) + + result = scope.get_pcollection("Create") + self.assertEqual("PCollection[Create/Map(decode).None]", str(result)) + + result = scope.get_pcollection("Square") + self.assertEqual("PCollection[Square.None]", str(result)) + + result = scope.get_pcollection("PyMap") + self.assertEqual("PCollection[Square.None]", str(result)) + + def test_get_pcollection_ambigous_name(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + spec = ''' + transforms: + - type: Create + elements: [0, 1, 3, 4] + - type: PyMap + fn: "lambda x: x*x" + - type: PyMap + fn: "lambda x: x*x*x" + ''' + + scope, spec = self.get_scope_by_spec(p, spec) + + with self.assertRaisesRegex(ValueError, r'Ambiguous.*'): Review Comment: I'm on the fence whether this is redundant with the existing test_name_is_ambiguous test (though that could be strengthened to use assertRaisesRegex). ########## sdks/python/apache_beam/yaml/yaml_transform_ut_test.py: ########## @@ -0,0 +1,301 @@ +# Review Comment: Also, maybe call this file "_unit_test" rather than "ut_test" (took me a bit to figure out what "ut" was). ########## sdks/python/apache_beam/yaml/yaml_transform_scope_test.py: ########## @@ -0,0 +1,213 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import unittest + +import yaml + +import apache_beam as beam +from apache_beam.yaml import yaml_provider +from apache_beam.yaml.yaml_transform import SafeLineLoader +from apache_beam.yaml.yaml_transform import Scope + + +class ScopeTest(unittest.TestCase): + def get_scope_by_spec(self, p, spec): + spec = yaml.load(spec, Loader=SafeLineLoader) + + scope = Scope( + beam.pvalue.PBegin(p), {}, + spec['transforms'], + yaml_provider.standard_providers(), {}) + return scope, spec + + def test_get_pcollection_input(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + elements = p | beam.Create(range(3)) + scope = Scope( + p, {'input': elements}, + transforms=[], + providers=yaml_provider.standard_providers(), + input_providers={}) + + result = scope.get_pcollection('input') + self.assertEqual("PCollection[Create/Map(decode).None]", str(result)) + + def test_get_pcollection_output(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + spec = ''' + transforms: + - type: Create + elements: [0, 1, 3, 4] + - type: PyMap + name: Square + input: Create + fn: "lambda x: x*x" + ''' + + scope, spec = self.get_scope_by_spec(p, spec) + + result = scope.get_pcollection("Create") + self.assertEqual("PCollection[Create/Map(decode).None]", str(result)) + + result = scope.get_pcollection("Square") + self.assertEqual("PCollection[Square.None]", str(result)) + + result = scope.get_pcollection("PyMap") + self.assertEqual("PCollection[Square.None]", str(result)) + + def test_get_pcollection_ambigous_name(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( Review Comment: Is this needed/used here? ########## sdks/python/apache_beam/yaml/yaml_transform_ut_test.py: ########## @@ -0,0 +1,301 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import logging +import unittest + +import yaml + +import apache_beam as beam +from apache_beam.yaml import yaml_provider +from apache_beam.yaml import yaml_transform +from apache_beam.yaml.yaml_transform import LightweightScope +from apache_beam.yaml.yaml_transform import SafeLineLoader +from apache_beam.yaml.yaml_transform import Scope +from apache_beam.yaml.yaml_transform import expand_composite_transform +from apache_beam.yaml.yaml_transform import expand_leaf_transform +from apache_beam.yaml.yaml_transform import pipeline_as_composite + + +class YamlTransformTest(unittest.TestCase): + def test_only_element(self): + self.assertEqual(yaml_transform.only_element((1, )), 1) + + +class SafeLineLoaderTest(unittest.TestCase): + def test_get_line(self): + pipeline_yaml = ''' + type: composite + input: + elements: input + transforms: + - type: PyMap + name: Square + input: elements + fn: "lambda x: x * x" + - type: PyMap + name: Cube + input: elements + fn: "lambda x: x * x * x" + output: + Flatten + ''' + spec = yaml.load(pipeline_yaml, Loader=SafeLineLoader) + self.assertEqual(SafeLineLoader.get_line(spec['type']), 2) + self.assertEqual(SafeLineLoader.get_line(spec['input']), 4) + self.assertEqual(SafeLineLoader.get_line(spec['transforms'][0]), 6) + self.assertEqual(SafeLineLoader.get_line(spec['transforms'][0]['type']), 6) + self.assertEqual(SafeLineLoader.get_line(spec['transforms'][0]['name']), 7) + self.assertEqual(SafeLineLoader.get_line(spec['transforms'][1]), 10) + self.assertEqual(SafeLineLoader.get_line(spec['output']), 15) + self.assertEqual(SafeLineLoader.get_line(spec['transforms']), "unknown") + + def test_strip_metadata(self): + spec_yaml = ''' + transforms: + - type: PyMap + name: Square + ''' + spec = yaml.load(spec_yaml, Loader=SafeLineLoader) + stripped = SafeLineLoader.strip_metadata(spec['transforms']) + + self.assertFalse(hasattr(stripped[0], '__line__')) + self.assertFalse(hasattr(stripped[0], '__uuid__')) + + def test_strip_metadata_nothing_to_strip(self): + spec_yaml = 'prop: 123' + spec = yaml.load(spec_yaml, Loader=SafeLineLoader) + stripped = SafeLineLoader.strip_metadata(spec['prop']) + + self.assertFalse(hasattr(stripped, '__line__')) + self.assertFalse(hasattr(stripped, '__uuid__')) + + +class LightweightScopeTest(unittest.TestCase): + @staticmethod + def get_spec(): + pipeline_yaml = ''' + - type: PyMap + name: Square + input: elements + fn: "lambda x: x * x" + - type: PyMap + name: PyMap + input: elements + fn: "lambda x: x * x * x" + - type: Filter + name: FilterOutBigNumbers + input: PyMap + keep: "lambda x: x<100" + ''' + return yaml.load(pipeline_yaml, Loader=SafeLineLoader) + + def test_init(self): + spec = self.get_spec() + scope = LightweightScope(spec) + self.assertEqual(len(scope._transforms_by_uuid), 3) + self.assertCountEqual( + list(scope._uuid_by_name.keys()), + ["PyMap", "Square", "Filter", "FilterOutBigNumbers"]) + + def test_get_transform_id_and_output_name(self): + spec = self.get_spec() + scope = LightweightScope(spec) + transform_id, output = scope.get_transform_id_and_output_name("Square") + self.assertEqual(transform_id, spec[0]['__uuid__']) + self.assertEqual(output, None) + + def test_get_transform_id_and_output_name_with_dot(self): + spec = self.get_spec() + scope = LightweightScope(spec) + transform_id, output = \ + scope.get_transform_id_and_output_name("Square.OutputName") + self.assertEqual(transform_id, spec[0]['__uuid__']) + self.assertEqual(output, "OutputName") + + def test_get_transform_id_by_uuid(self): + spec = self.get_spec() + scope = LightweightScope(spec) + transform_id = scope.get_transform_id(spec[0]['__uuid__']) + self.assertEqual(transform_id, spec[0]['__uuid__']) + + def test_get_transform_id_by_unique_name(self): + spec = self.get_spec() + scope = LightweightScope(spec) + transform_id = scope.get_transform_id("Square") + self.assertEqual(transform_id, spec[0]['__uuid__']) + + def test_get_transform_id_by_ambiguous_name(self): + spec = self.get_spec() + scope = LightweightScope(spec) + with self.assertRaisesRegex(ValueError, r'Ambiguous.*PyMap'): + scope.get_transform_id(scope.get_transform_id(spec[1]['name'])) + + def test_get_transform_id_by_unknown_name(self): + spec = self.get_spec() + scope = LightweightScope(spec) + with self.assertRaisesRegex(ValueError, r'Unknown.*NotExistingTransform'): + scope.get_transform_id("NotExistingTransform") + + +def new_pipeline(): + return beam.Pipeline( + options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) + + +class MainTest(unittest.TestCase): + def get_scope_by_spec(self, p, spec, inputs=None): + if inputs is None: + inputs = {} + spec = yaml.load(spec, Loader=SafeLineLoader) + + scope = Scope( + beam.pvalue.PBegin(p), + inputs, + spec['transforms'], + yaml_provider.standard_providers(), {}) + return scope, spec + + def test_expand_leaf_transform_with_input(self): + with new_pipeline() as p: + spec = ''' + transforms: + - type: Create + elements: [0] + - type: PyMap + input: Create + fn: "lambda x: x*x" + ''' + scope, spec = self.get_scope_by_spec(p, spec) + result = expand_leaf_transform(spec['transforms'][1], scope) Review Comment: It's a little unclear what you're trying to test here. I am a bit wary of making tests that are to tied to the particular implementation. Generally I think we want to focus on testing things that (1) are part of the public API and/or (2) invariants or behaviors that if broken would cause issues with the feature. ########## sdks/python/apache_beam/yaml/yaml_transform_scope_test.py: ########## @@ -0,0 +1,213 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import unittest + +import yaml + +import apache_beam as beam +from apache_beam.yaml import yaml_provider +from apache_beam.yaml.yaml_transform import SafeLineLoader +from apache_beam.yaml.yaml_transform import Scope + + +class ScopeTest(unittest.TestCase): + def get_scope_by_spec(self, p, spec): + spec = yaml.load(spec, Loader=SafeLineLoader) + + scope = Scope( + beam.pvalue.PBegin(p), {}, + spec['transforms'], + yaml_provider.standard_providers(), {}) + return scope, spec + + def test_get_pcollection_input(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + elements = p | beam.Create(range(3)) + scope = Scope( + p, {'input': elements}, + transforms=[], + providers=yaml_provider.standard_providers(), + input_providers={}) + + result = scope.get_pcollection('input') + self.assertEqual("PCollection[Create/Map(decode).None]", str(result)) + + def test_get_pcollection_output(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + spec = ''' + transforms: + - type: Create + elements: [0, 1, 3, 4] + - type: PyMap + name: Square + input: Create + fn: "lambda x: x*x" + ''' + + scope, spec = self.get_scope_by_spec(p, spec) + + result = scope.get_pcollection("Create") + self.assertEqual("PCollection[Create/Map(decode).None]", str(result)) + + result = scope.get_pcollection("Square") + self.assertEqual("PCollection[Square.None]", str(result)) + + result = scope.get_pcollection("PyMap") + self.assertEqual("PCollection[Square.None]", str(result)) + + def test_get_pcollection_ambigous_name(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + spec = ''' + transforms: + - type: Create + elements: [0, 1, 3, 4] + - type: PyMap + fn: "lambda x: x*x" + - type: PyMap + fn: "lambda x: x*x*x" + ''' + + scope, spec = self.get_scope_by_spec(p, spec) + + with self.assertRaisesRegex(ValueError, r'Ambiguous.*'): + scope.get_pcollection("PyMap") + + def test_unique_name_by_name(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + spec = ''' + transforms: + - type: Create + name: MyElements + elements: [0, 1, 3, 4] + ''' + scope, spec = self.get_scope_by_spec(p, spec) + + spec_transform = spec['transforms'][0] + p_transform = scope.create_ptransform(spec_transform, []) + + result = scope.unique_name(spec_transform, p_transform) + self.assertEqual(result, "MyElements") + self.assertIn("MyElements", scope._seen_names) + + result = scope.unique_name(spec_transform, p_transform) + self.assertIn("MyElements@3", scope._seen_names) + self.assertEqual(result, "MyElements@3") + + def test_unique_name_by_label(self): Review Comment: _by_type rather than _by_label? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
