robertwb commented on code in PR #27338: URL: https://github.com/apache/beam/pull/27338#discussion_r1266017973
########## sdks/python/apache_beam/yaml/yaml_transform_scope_test.py: ########## @@ -0,0 +1,213 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import unittest + +import yaml + +import apache_beam as beam +from apache_beam.yaml import yaml_provider +from apache_beam.yaml.yaml_transform import SafeLineLoader +from apache_beam.yaml.yaml_transform import Scope + + +class ScopeTest(unittest.TestCase): + def get_scope_by_spec(self, p, spec): + spec = yaml.load(spec, Loader=SafeLineLoader) + + scope = Scope( + beam.pvalue.PBegin(p), {}, + spec['transforms'], + yaml_provider.standard_providers(), {}) + return scope, spec + + def test_get_pcollection_input(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + elements = p | beam.Create(range(3)) + scope = Scope( + p, {'input': elements}, + transforms=[], + providers=yaml_provider.standard_providers(), + input_providers={}) + + result = scope.get_pcollection('input') + self.assertEqual("PCollection[Create/Map(decode).None]", str(result)) + + def test_get_pcollection_output(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + spec = ''' + transforms: + - type: Create + elements: [0, 1, 3, 4] + - type: PyMap + name: Square + input: Create + fn: "lambda x: x*x" + ''' + + scope, spec = self.get_scope_by_spec(p, spec) + + result = scope.get_pcollection("Create") + self.assertEqual("PCollection[Create/Map(decode).None]", str(result)) + + result = scope.get_pcollection("Square") + self.assertEqual("PCollection[Square.None]", str(result)) + + result = scope.get_pcollection("PyMap") + self.assertEqual("PCollection[Square.None]", str(result)) + + def test_get_pcollection_ambigous_name(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + spec = ''' + transforms: + - type: Create + elements: [0, 1, 3, 4] + - type: PyMap + fn: "lambda x: x*x" + - type: PyMap + fn: "lambda x: x*x*x" + ''' + + scope, spec = self.get_scope_by_spec(p, spec) + + with self.assertRaisesRegex(ValueError, r'Ambiguous.*'): + scope.get_pcollection("PyMap") + + def test_unique_name_by_name(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + spec = ''' + transforms: + - type: Create + name: MyElements + elements: [0, 1, 3, 4] + ''' + scope, spec = self.get_scope_by_spec(p, spec) + + spec_transform = spec['transforms'][0] + p_transform = scope.create_ptransform(spec_transform, []) + + result = scope.unique_name(spec_transform, p_transform) + self.assertEqual(result, "MyElements") + self.assertIn("MyElements", scope._seen_names) + + result = scope.unique_name(spec_transform, p_transform) Review Comment: I wouldn't bother testing this one at all. (Maybe it should be renamed to be private...) ########## sdks/python/apache_beam/yaml/yaml_transform_ut_test.py: ########## @@ -0,0 +1,301 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import logging +import unittest + +import yaml + +import apache_beam as beam +from apache_beam.yaml import yaml_provider +from apache_beam.yaml import yaml_transform +from apache_beam.yaml.yaml_transform import LightweightScope +from apache_beam.yaml.yaml_transform import SafeLineLoader +from apache_beam.yaml.yaml_transform import Scope +from apache_beam.yaml.yaml_transform import expand_composite_transform +from apache_beam.yaml.yaml_transform import expand_leaf_transform +from apache_beam.yaml.yaml_transform import pipeline_as_composite + + +class YamlTransformTest(unittest.TestCase): + def test_only_element(self): + self.assertEqual(yaml_transform.only_element((1, )), 1) + + +class SafeLineLoaderTest(unittest.TestCase): + def test_get_line(self): + pipeline_yaml = ''' + type: composite + input: + elements: input + transforms: + - type: PyMap + name: Square + input: elements + fn: "lambda x: x * x" + - type: PyMap + name: Cube + input: elements + fn: "lambda x: x * x * x" + output: + Flatten + ''' + spec = yaml.load(pipeline_yaml, Loader=SafeLineLoader) + self.assertEqual(SafeLineLoader.get_line(spec['type']), 2) + self.assertEqual(SafeLineLoader.get_line(spec['input']), 4) + self.assertEqual(SafeLineLoader.get_line(spec['transforms'][0]), 6) + self.assertEqual(SafeLineLoader.get_line(spec['transforms'][0]['type']), 6) + self.assertEqual(SafeLineLoader.get_line(spec['transforms'][0]['name']), 7) + self.assertEqual(SafeLineLoader.get_line(spec['transforms'][1]), 10) + self.assertEqual(SafeLineLoader.get_line(spec['output']), 15) + self.assertEqual(SafeLineLoader.get_line(spec['transforms']), "unknown") + + def test_strip_metadata(self): + spec_yaml = ''' + transforms: + - type: PyMap + name: Square + ''' + spec = yaml.load(spec_yaml, Loader=SafeLineLoader) + stripped = SafeLineLoader.strip_metadata(spec['transforms']) + + self.assertFalse(hasattr(stripped[0], '__line__')) + self.assertFalse(hasattr(stripped[0], '__uuid__')) + + def test_strip_metadata_nothing_to_strip(self): + spec_yaml = 'prop: 123' + spec = yaml.load(spec_yaml, Loader=SafeLineLoader) + stripped = SafeLineLoader.strip_metadata(spec['prop']) + + self.assertFalse(hasattr(stripped, '__line__')) + self.assertFalse(hasattr(stripped, '__uuid__')) + + +class LightweightScopeTest(unittest.TestCase): + @staticmethod + def get_spec(): + pipeline_yaml = ''' + - type: PyMap + name: Square + input: elements + fn: "lambda x: x * x" + - type: PyMap + name: PyMap + input: elements + fn: "lambda x: x * x * x" + - type: Filter + name: FilterOutBigNumbers + input: PyMap + keep: "lambda x: x<100" + ''' + return yaml.load(pipeline_yaml, Loader=SafeLineLoader) + + def test_init(self): + spec = self.get_spec() + scope = LightweightScope(spec) + self.assertEqual(len(scope._transforms_by_uuid), 3) + self.assertCountEqual( + list(scope._uuid_by_name.keys()), + ["PyMap", "Square", "Filter", "FilterOutBigNumbers"]) + + def test_get_transform_id_and_output_name(self): + spec = self.get_spec() + scope = LightweightScope(spec) + transform_id, output = scope.get_transform_id_and_output_name("Square") + self.assertEqual(transform_id, spec[0]['__uuid__']) + self.assertEqual(output, None) + + def test_get_transform_id_and_output_name_with_dot(self): + spec = self.get_spec() + scope = LightweightScope(spec) + transform_id, output = \ + scope.get_transform_id_and_output_name("Square.OutputName") + self.assertEqual(transform_id, spec[0]['__uuid__']) + self.assertEqual(output, "OutputName") + + def test_get_transform_id_by_uuid(self): + spec = self.get_spec() + scope = LightweightScope(spec) + transform_id = scope.get_transform_id(spec[0]['__uuid__']) + self.assertEqual(transform_id, spec[0]['__uuid__']) + + def test_get_transform_id_by_unique_name(self): + spec = self.get_spec() + scope = LightweightScope(spec) + transform_id = scope.get_transform_id("Square") + self.assertEqual(transform_id, spec[0]['__uuid__']) + + def test_get_transform_id_by_ambiguous_name(self): + spec = self.get_spec() + scope = LightweightScope(spec) + with self.assertRaisesRegex(ValueError, r'Ambiguous.*PyMap'): + scope.get_transform_id(scope.get_transform_id(spec[1]['name'])) + + def test_get_transform_id_by_unknown_name(self): + spec = self.get_spec() + scope = LightweightScope(spec) + with self.assertRaisesRegex(ValueError, r'Unknown.*NotExistingTransform'): + scope.get_transform_id("NotExistingTransform") + + +def new_pipeline(): + return beam.Pipeline( + options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) + + +class MainTest(unittest.TestCase): + def get_scope_by_spec(self, p, spec, inputs=None): + if inputs is None: + inputs = {} + spec = yaml.load(spec, Loader=SafeLineLoader) + + scope = Scope( + beam.pvalue.PBegin(p), + inputs, + spec['transforms'], + yaml_provider.standard_providers(), {}) + return scope, spec + + def test_expand_leaf_transform_with_input(self): + with new_pipeline() as p: + spec = ''' + transforms: + - type: Create + elements: [0] + - type: PyMap + input: Create + fn: "lambda x: x*x" + ''' + scope, spec = self.get_scope_by_spec(p, spec) + result = expand_leaf_transform(spec['transforms'][1], scope) Review Comment: Yeah, I'd remove this test. I think the guiding principle for testing internal things is is "if this changed its behavior, would things break in inscrutable ways?" -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
