liferoad commented on code in PR #35774: URL: https://github.com/apache/beam/pull/35774#discussion_r2267985235
########## sdks/python/apache_beam/yaml/yaml_ml.py: ########## @@ -514,11 +509,36 @@ def ml_transform( options.YamlOptions.check_enabled(pcoll.pipeline, 'ML') # TODO(robertwb): Perhaps _config_to_obj could be pushed into MLTransform # itself for better cross-language support? - return pcoll | MLTransform( + result = pcoll | MLTransform( write_artifact_location=write_artifact_location, read_artifact_location=read_artifact_location, transforms=[_config_to_obj(t) for t in transforms] if transforms else []) + if transforms and any(t.get('type') == 'SentenceTransformerEmbeddings' + for t in transforms): + from apache_beam.typehints import List + try: + if pcoll.element_type: + new_fields = named_fields_from_element_type(pcoll.element_type) + columns_to_change = set() + for t_spec in transforms: + if t_spec.get('type') == 'SentenceTransformerEmbeddings': + columns_to_change.update( + t_spec.get('config', {}).get('columns', [])) + + final_fields = [] + for name, typ in new_fields: + if name in columns_to_change: + final_fields.append((name, List[float])) + else: + final_fields.append((name, typ)) + output_schema = RowTypeConstraint.from_fields(final_fields) + return result | beam.Map(lambda x: x).with_output_types(output_schema) Review Comment: Good idea. Done. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@beam.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org