ashb commented on a change in pull request #5743: [AIRFLOW-5088][AIP-24]
Persisting serialized DAG in DB for webserver scalability
URL: https://github.com/apache/airflow/pull/5743#discussion_r334438983
##########
File path: airflow/dag/serialization/schema.json
##########
@@ -0,0 +1,196 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "$id": "https://airflow.apache.com/schemas/serialized-dags.json",
+ "definitions": {
+ "datetime": {
+ "description": "A date time, stored as fractional seconds since the
epoch",
+ "type": "number"
+ },
+ "typed_datetime": {
+ "type": "object",
+ "properties": {
+ "__type": {
+ "type": "string",
+ "const": "datetime"
+ },
+ "__var": { "$ref": "#/definitions/datetime" }
+ },
+ "required": [
+ "__type",
+ "__var"
+ ],
+ "additionalProperties": false
+ },
+ "timedelta": {
+ "type": "number",
+ "minimum": 0
+ },
+ "typed_timedelta": {
+ "type": "object",
+ "properties": {
+ "__type": {
+ "type": "string",
+ "const": "timedelta"
+ },
+ "__var": { "$ref": "#/definitions/timedelta" }
+ },
+ "required": [
+ "__type",
+ "__var"
+ ],
+ "additionalProperties": false
+ },
+ "typed_relativedelta": {
+ "type": "object",
+ "description": "A dateutil.relativedelta.relativedelta object",
+ "properties": {
+ "__type": {
+ "type": "string",
+ "const": "relativedelta"
+ },
+ "__var": {
+ "type": "object",
+ "properties": {
+ "weekday": {
+ "type": "array",
+ "items": { "type": "integer" },
+ "minItems": 1,
+ "maxItems": 2
+ }
+ },
+ "additionalProperties": { "type": "integer" }
+ }
+ }
+ },
+ "timezone": {
+ "type": "string"
+ },
+ "dict": {
+ "description": "A python dictionary containing values of any type",
+ "type": "object"
+ },
+ "color": {
+ "type": "string",
+ "pattern": "^#[a-fA-F0-9]{3,6}$"
+ },
+ "stringset": {
+ "description": "A set of strings",
+ "type": "object",
+ "properties": {
+ "__type": {
+ "type": "string",
+ "const": "set"
+ },
+ "__var": {
+ "type": "array",
+ "items": { "type": "string" }
+ }
+ },
+ "required": [
+ "__type",
+ "__var"
+ ]
+ },
+ "dag": {
+ "type": "object",
+ "properties": {
+ "params": { "$ref": "#/definitions/dict" },
+ "_dag_id": { "type": "string" },
+ "task_dict": { "$ref": "#/definitions/task_dict" },
+ "timezone": { "$ref": "#/definitions/timezone" },
+ "schedule_interval": {
+ "anyOf": [
+ { "type": "null" },
+ { "type": "string" },
+ { "$ref": "#/definitions/typed_timedelta" },
+ { "$ref": "#/definitions/typed_relativedelta" }
+ ]
+ },
+ "catchup": { "type": "boolean" },
+ "is_subdag": { "type": "boolean" },
+ "fileloc": { "type" : "string"},
+ "orientation": { "type" : "string"},
+ "_description": { "type" : "string"},
+ "_concurrency": { "type" : "number"},
+ "max_active_runs": { "type" : "number"},
+ "default_args": { "$ref": "#/definitions/dict" },
+ "start_date": { "$ref": "#/definitions/datetime" },
+ "dagrun_timeout": { "$ref": "#/definitions/timedelta" },
+ "doc_md": { "type" : "string"}
+ },
+ "required": [
+ "params",
+ "_dag_id",
+ "fileloc",
+ "task_dict"
+ ],
+ "additionalProperties": false
+ },
+ "task_dict": {
+ "type": "object",
+ "additionalProperties": { "$ref": "#/definitions/operator" }
+ },
+ "operator": {
+ "$comment": "A task/operator in a DAG",
+ "type": "object",
+ "required": [
+ "_task_type",
+ "_task_module",
+ "task_id",
+ "ui_color",
+ "ui_fgcolor",
+ "template_fields"
+ ],
+ "properties": {
+ "_task_type": { "type": "string" },
+ "_task_module": { "type": "string" },
+ "task_id": { "type": "string" },
+ "owner": { "type": "string" },
+ "start_date": { "$ref": "#/definitions/datetime" },
+ "end_date": { "$ref": "#/definitions/datetime" },
+ "trigger_rule": { "type": "string" },
+ "depends_on_past": { "type": "boolean" },
+ "wait_for_downstream": { "type": "boolean" },
+ "retries": { "type": "number" },
+ "queue": { "type": "string" },
+ "pool": { "type": "string" },
+ "execution_timeout": { "$ref": "#/definitions/timedelta" },
+ "retry_delay": { "$ref": "#/definitions/timedelta" },
+ "retry_exponential_backoff": { "type": "boolean" },
+ "params": { "$ref": "#/definitions/dict" },
+ "priority_weight": { "type": "number" },
+ "weight_rule": { "type": "string" },
+ "executor_config": { "$ref": "#/definitions/dict" },
+ "do_xcom_push": { "type": "boolean" },
+ "ui_color": { "$ref": "#/definitions/color" },
+ "ui_fgcolor": { "$ref": "#/definitions/color" },
+ "template_fields": {
Review comment:
Oh duh, i forgot about that :)
Mostly because the extra "type deocration" isn't needed anymore and we've
removed it in cases where it is un-ambigious to the deserializer. (For the huge
dags we've been testing with the parsing time starts to have a non-trivial
impact on performance). Since `stringset` isn't used anymore we should probably
remove that definition instead.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services