dstandish commented on a change in pull request #19267:
URL: https://github.com/apache/airflow/pull/19267#discussion_r743320052
##########
File path: tests/serialization/test_dag_serialization.py
##########
@@ -1433,29 +1461,32 @@ def test_serialized_objects_are_sorted(self,
object_to_serialized, expected_outp
assert serialized_obj == expected_output
def test_params_upgrade(self):
+ """when pre-2.2.0 param (i.e. primitive) is deserialized we convert to
Param"""
serialized = {
"__version": 1,
"dag": {
"_dag_id": "simple_dag",
- "fileloc": __file__,
+ "fileloc": '__file__',
Review comment:
this value actually does not have any impact on the test. it could be
`/hello/world.py`
but when you put `__file__` you can't run the bit of code interactively.
```python
serialized = {
"__version": 1,
"dag": {
"_dag_id": "simple_dag",
"fileloc": __file__,
"tasks": [],
"timezone": "UTC",
"params": {"none": None, "str": "str", "dict": {"a": "b"}},
},
}
dag = SerializedDAG.from_dict(serialized)
Traceback (most recent call last):
File
"/Users/dstandish/.virtualenvs/local/lib/python3.8/site-packages/IPython/core/interactiveshell.py",
line 3444, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-2-4ee74d1eaf6b>", line 5, in <module>
"fileloc": __file__,
NameError: name '__file__' is not defined
```
##########
File path: tests/serialization/test_dag_serialization.py
##########
@@ -733,7 +735,10 @@ def test_dag_params_roundtrip(self, val, expected_val):
dag = DAG(dag_id='simple_dag', params=val)
BaseOperator(task_id='simple_task', dag=dag, start_date=datetime(2019,
8, 1))
- serialized_dag = SerializedDAG.to_dict(dag)
+ serialized_dag_json = SerializedDAG.to_json(dag)
+
+ serialized_dag = json.loads(serialized_dag_json)
Review comment:
by calling `to_json` we make this test stricter. the full serialization
process not only converts to a dictionary (which is python, thus can handle
arbitrary data types) but also ultimately serializes to json. but the test
previously did not convert to json it only to dict and back. and this is
actually one of the reasons we did not catch the `set` issue, because itt
worked for to_dict but not for to_json.
so here we first go all the way to json before deserilaizing. in other
words, previously the round trip wasn't really a full round trip and that's
what this change fixes.
##########
File path: tests/serialization/test_dag_serialization.py
##########
@@ -733,7 +735,10 @@ def test_dag_params_roundtrip(self, val, expected_val):
dag = DAG(dag_id='simple_dag', params=val)
BaseOperator(task_id='simple_task', dag=dag, start_date=datetime(2019,
8, 1))
- serialized_dag = SerializedDAG.to_dict(dag)
+ serialized_dag_json = SerializedDAG.to_json(dag)
+
+ serialized_dag = json.loads(serialized_dag_json)
Review comment:
by calling `to_json` we make this test stricter. the full serialization
process must convert not only to a dictionary (which is python, thus can handle
arbitrary data types) but also ultimately must serialize to json.
the test previously did not convert to json; only to dict and back. and
this is actually one of the reasons we did not catch the `set` issue, because
it worked for to_dict but not for to_json.
so here we first go all the way to json before deserilaizing.
in other words, previously the round trip wasn't really a full round trip
and that's what this change fixes.
##########
File path: airflow/models/param.py
##########
@@ -49,18 +50,37 @@ class Param:
"""
__NO_VALUE_SENTINEL = NoValueSentinel()
+ CLASS_IDENTIFIER = '__class'
def __init__(self, default: Any = __NO_VALUE_SENTINEL, description: str =
None, **kwargs):
+
self.value = default
self.description = description
self.schema = kwargs.pop('schema') if 'schema' in kwargs else kwargs
- # If we have a value, validate it once. May raise ValueError.
if self.has_value:
- try:
- jsonschema.validate(self.value, self.schema,
format_checker=FormatChecker())
- except ValidationError as err:
- raise ValueError(err)
+ self._validate(self.value, self.schema)
+
+ @staticmethod
+ def _validate(value, schema):
+ """
+ 1. Check that value is json-serializable; if not, warn. In future
release we will require
+ the value to be json-serializable.
+ 2. Validate ``value`` against ``schema``
+ """
+ try:
+ json.dumps(value)
+ except TypeError:
+ warnings.warn(
+ "The use of non-json-serializable params is deprecated and
will be removed in a"
+ " future release",
+ DeprecationWarning,
+ stacklevel=2,
+ )
Review comment:
ok sure.
##########
File path: airflow/models/param.py
##########
@@ -49,18 +50,37 @@ class Param:
"""
__NO_VALUE_SENTINEL = NoValueSentinel()
+ CLASS_IDENTIFIER = '__class'
def __init__(self, default: Any = __NO_VALUE_SENTINEL, description: str =
None, **kwargs):
+
self.value = default
self.description = description
self.schema = kwargs.pop('schema') if 'schema' in kwargs else kwargs
- # If we have a value, validate it once. May raise ValueError.
if self.has_value:
- try:
- jsonschema.validate(self.value, self.schema,
format_checker=FormatChecker())
- except ValidationError as err:
- raise ValueError(err)
+ self._validate(self.value, self.schema)
+
+ @staticmethod
+ def _validate(value, schema):
+ """
+ 1. Check that value is json-serializable; if not, warn. In future
release we will require
+ the value to be json-serializable.
+ 2. Validate ``value`` against ``schema``
+ """
+ try:
+ json.dumps(value)
+ except TypeError:
+ warnings.warn(
+ "The use of non-json-serializable params is deprecated and
will be removed in a"
+ " future release",
+ DeprecationWarning,
+ stacklevel=2,
+ )
Review comment:
done
##########
File path: tests/serialization/test_dag_serialization.py
##########
@@ -733,7 +735,10 @@ def test_dag_params_roundtrip(self, val, expected_val):
dag = DAG(dag_id='simple_dag', params=val)
BaseOperator(task_id='simple_task', dag=dag, start_date=datetime(2019,
8, 1))
- serialized_dag = SerializedDAG.to_dict(dag)
+ serialized_dag_json = SerializedDAG.to_json(dag)
+
+ serialized_dag = json.loads(serialized_dag_json)
Review comment:
by calling `to_json` we make this test stricter. the full serialization
process must convert not only to a dictionary (which is python, thus can handle
arbitrary data types) but also ultimately must serialize to json.
the test previously did not convert to json; only to dict and back. so
without making this change, it would not catch the `set` issue.
so here we first go all the way to json before deserilaizing.
in other words, previously the round trip wasn't really a full round trip
and that's what this change fixes.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]