charlespnh commented on code in PR #35715:
URL: https://github.com/apache/beam/pull/35715#discussion_r2240177185
##########
sdks/python/apache_beam/yaml/yaml_ml.py:
##########
@@ -29,11 +31,28 @@
from apache_beam.yaml import options
from apache_beam.yaml.yaml_utils import SafeLineLoader
+
+def list_submodules(package):
+ """
+ Lists all submodules within a given package.
+ """
+ submodules = []
+ for _, module_name, _ in pkgutil.walk_packages(
+ package.__path__, package.__name__ + '.'):
+ if 'test' in module_name:
+ continue
+ submodules.append(module_name)
+ return submodules
+
+
try:
from apache_beam.ml.transforms import tft
from apache_beam.ml.transforms.base import MLTransform
# TODO(robertwb): Is this all of them?
- _transform_constructors = tft.__dict__
+ _transform_constructors = {}
+ for module_name in list_submodules(beam.ml.transforms):
+ module = import_module(module_name)
+ _transform_constructors |= module.__dict__
except ImportError:
Review Comment:
Just want to understand the behaviour here: If my pipeline only uses TFT
transforms, but here we're trying to import other embedding transforms
submodules that may not have the dependencies properly installed (e.g. openai),
then that means `import_module` will throw and we get to the `except` block
where `tft = None`?
... Basically installing these dependencies is non-optional even though I'm
not using these transforms in my pipeline?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]