ahmedabu98 commented on code in PR #29561: URL: https://github.com/apache/beam/pull/29561#discussion_r1417935609
########## sdks/python/apache_beam/transforms/wrapper_provider.py: ########## @@ -0,0 +1,186 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import logging +import typing +from collections import namedtuple + +from apache_beam.transforms import PTransform +from apache_beam.transforms.external import BeamJarExpansionService +from apache_beam.transforms.external import SchemaAwareExternalTransform +from apache_beam.transforms.external import SchemaTransformsConfig +from apache_beam.typehints.schemas import named_tuple_to_schema +from apache_beam.typehints.schemas import typing_from_runner_api + + +def snake_case_to_upper_camel_case(string): + """Convert snake_case to UpperCamelCase""" + components = string.split('_') + output = ''.join(n.capitalize() for n in components) + return output + + +def snake_case_to_lower_camel_case(string): + """Convert snake_case to lowerCamelCase""" + if len(string) <= 1: + return string.lower() + upper = snake_case_to_upper_camel_case(string) + return upper[0].lower() + upper[1:] + + +def camel_case_to_snake_case(string): + """Convert camelCase to snake_case""" + arr = ['_' + n.lower() if n.isupper() else n for n in string] + return ''.join(arr).lstrip('_') + + +# Information regarding a Wrapper parameter. +ParamInfo = namedtuple('ParamInfo', ['type', 'description', 'original_name']) + + +def get_config_with_descriptions(schematransform: SchemaTransformsConfig): + # Prepare a configuration schema that includes types and descriptions + schema = named_tuple_to_schema(schematransform.configuration_schema) + descriptions = schematransform.configuration_schema._field_descriptions + fields_with_descriptions = {} + for field in schema.fields: + fields_with_descriptions[camel_case_to_snake_case(field.name)] = ParamInfo( + typing_from_runner_api(field.type), + descriptions[field.name], + field.name) + + return fields_with_descriptions + + +class Wrapper(PTransform): + """Template for a SchemaTransform Python wrappeer""" + + # These attributes need to be set when a Wrapper type is created + default_expansion_service = None + identifier = None + + def __init__(self, expansion_service=None, **kwargs): + self._kwargs = kwargs + self._expansion_service = \ + expansion_service or self.default_expansion_service + self.schematransform: SchemaTransformsConfig = \ + SchemaAwareExternalTransform.discover_config( + self._expansion_service, self.identifier) + + def expand(self, input): + camel_case_kwargs = { + snake_case_to_lower_camel_case(k): v + for k, v in self._kwargs.items() + } + + external_schematransform = SchemaAwareExternalTransform( + identifier=self.identifier, + expansion_service=self._expansion_service, + rearrange_based_on_discovery=True, + **camel_case_kwargs) + + input_tags = self.schematransform.inputs + # TODO(ahmedabu98): how do we handle the case of multiple input pcolls? Review Comment: P.S. I've been trying a few things and the following line seems to work for all cases somehow? `return input | external_schematransform` Obv works for a dict of tagged pcolls Works for sending one untagged PCollection (even though SchemaTransforms expect a PCollectionRowTuple of tagged pcolls) Works when there are 0 inputs and we apply straight to the pipeline (`p | external_transform...`) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
