gemini-code-assist[bot] commented on code in PR #38769:
URL: https://github.com/apache/beam/pull/38769#discussion_r3341187524
##########
sdks/python/apache_beam/ml/transforms/base_test.py:
##########
@@ -841,6 +843,46 @@ def test_save_and_load_run_inference(self):
self.assertListEqual(
get_keys(model_handler), get_keys(loaded_model_handler))
+ @parameterized.expand([
+ # Pipelines pinned to a version older than 2.75.0 keep the pre-2.75.0
+ # jsonpickle behavior (safe=False, which permits eval-based decoding).
+ param(update_compatibility_version='2.74.0', expected_safe=False),
+ # The breaking-change version itself and newer decode securely.
+ param(update_compatibility_version='2.75.0', expected_safe=True),
+ # Pipelines that do not set the option (the common case) decode securely.
+ param(update_compatibility_version=None, expected_safe=True),
+ ])
+ def test_load_attributes_safe_flag_follows_compat_version(
+ self, update_compatibility_version, expected_safe):
+ data = [{'x': 'Hello world'}, {'x': 'Apache Beam'}]
+ with beam.Pipeline() as p:
+ _ = (
+ p
+ | beam.Create(data)
+ | base.MLTransform(
+ write_artifact_location=self.artifact_location).with_transform(
+ FakeEmbeddingsManager(columns=['x'])))
+
+ # FakeEmbeddingsManager reverses the values of the embedded columns.
+ expected_data = [{'x': d['x'][::-1]} for d in data]
+
+ options = PipelineOptions(
+ update_compatibility_version=update_compatibility_version)
+ with mock.patch.object(base.jsonpickle,
+ 'decode',
+ wraps=base.jsonpickle.decode) as mock_decode:
+ with beam.Pipeline(options=options) as p:
+ result = (
+ p
+ | beam.Create(data)
+ | base.MLTransform(read_artifact_location=self.artifact_location))
+ assert_that(result, equal_to(expected_data))
+
+ safe_flags = [
+ call.kwargs.get('safe') for call in mock_decode.call_args_list
+ ]
+ self.assertEqual(safe_flags, [expected_safe])
Review Comment:

To prevent test failures on environments running `jsonpickle < 4.0.0`, we
should conditionally assert the `safe` flag only if the parameter is supported
by the installed version of `jsonpickle`.
```python
import inspect
if 'safe' in inspect.signature(base.jsonpickle.decode).parameters:
safe_flags = [
call.kwargs.get('safe') for call in mock_decode.call_args_list
]
self.assertEqual(safe_flags, [expected_safe])
else:
self.assertTrue(mock_decode.called)
```
##########
sdks/python/apache_beam/ml/transforms/base.py:
##########
@@ -591,7 +592,18 @@ def save_attributes(
def load_attributes(artifact_location):
with FileSystems.open(os.path.join(artifact_location,
_ATTRIBUTE_FILE_NAME),
'rb') as f:
- return jsonpickle.decode(f.read())
+ # load_attributes runs eagerly during MLTransform.expand() at pipeline
+ # construction time, so the pipeline's options are available via the
+ # construction-time context.
+ pipeline_options = get_pipeline_options()
+ safe = True
+ if (pipeline_options is not None and
+ pipeline_options.is_compat_version_prior_to("2.75.0")):
+ # Keep the pre-2.75.0 jsonpickle behavior (safe=False permits
+ # eval-based decoding) for backwards compatibility with already-staged
+ # artifacts.
+ safe = False
+ return jsonpickle.decode(f.read(), safe=safe)
Review Comment:

Since `jsonpickle` versions `< 4.0.0` are still supported (as specified in
`setup.py` with `jsonpickle>=3.0.0,<5.0.0`), calling `jsonpickle.decode` with
the `safe` keyword argument will raise a `TypeError` on environments running
older versions of `jsonpickle`. We should conditionally pass the `safe`
argument only if it is supported by the installed version of `jsonpickle`.
```suggestion
pipeline_options = get_pipeline_options()
safe = True
if (pipeline_options is not None and
pipeline_options.is_compat_version_prior_to("2.75.0")):
# Keep the pre-2.75.0 jsonpickle behavior (safe=False permits
# eval-based decoding) for backwards compatibility with
already-staged
# artifacts.
safe = False
import inspect
kwargs = {}
if 'safe' in inspect.signature(jsonpickle.decode).parameters:
kwargs['safe'] = safe
return jsonpickle.decode(f.read(), **kwargs)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]