AnandInguva commented on code in PR #29564:
URL: https://github.com/apache/beam/pull/29564#discussion_r1414444997
##########
sdks/python/apache_beam/ml/transforms/base.py:
##########
@@ -254,3 +371,243 @@ def _increment_counters():
pipeline
| beam.Create([None])
| beam.Map(lambda _: _increment_counters()))
+
+
+class _TransformAttributeManager:
+ """
+ Base class used for saving and loading the attributes.
+ """
+ @staticmethod
+ def save_attributes(artifact_location):
+ """
+ Save the attributes to json file using stdlib json.
+ """
+ raise NotImplementedError
+
+ @staticmethod
+ def load_attributes(artifact_location):
+ """
+ Load the attributes from json file.
+ """
+ raise NotImplementedError
+
+
+class _JsonPickleTransformAttributeManager(_TransformAttributeManager):
+ """
+ Use Jsonpickle to save and load the attributes. Here the attributes refer
+ to the list of PTransforms that are used to process the data.
+
+ jsonpickle is used to serialize the PTransforms and save it to a json file
and
+ is compatible across python versions.
+ """
+ @staticmethod
+ def _is_remote_path(path):
+ is_gcs = path.find('gs://') != -1
+ # TODO: Add support for other remote paths.
+ if not is_gcs and path.find('://') != -1:
+ raise RuntimeError(
+ "Artifact locations are currently supported for only available for "
+ "local paths and GCS paths. Got: %s" % path)
+ return is_gcs
+
+ @staticmethod
+ def save_attributes(
+ ptransform_list,
+ artifact_location,
+ **kwargs,
+ ):
+ if _JsonPickleTransformAttributeManager._is_remote_path(artifact_location):
+ try:
+ options = kwargs.get('options')
+ except KeyError:
+ raise RuntimeError(
+ 'pipeline options are required to save the attributes.'
+ 'in the artifact location %s' % artifact_location)
Review Comment:
client here is the DataflowApplicationClient?
So we create that client here instead of below?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]