ashb commented on a change in pull request #5743: [AIRFLOW-5088][AIP-24] 
Persisting serialized DAG in DB for webserver scalability
URL: https://github.com/apache/airflow/pull/5743#discussion_r334438983
 
 

 ##########
 File path: airflow/dag/serialization/schema.json
 ##########
 @@ -0,0 +1,196 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#";,
+  "$id": "https://airflow.apache.com/schemas/serialized-dags.json";,
+  "definitions": {
+    "datetime": {
+      "description": "A date time, stored as fractional seconds since the 
epoch",
+      "type": "number"
+    },
+    "typed_datetime": {
+      "type": "object",
+      "properties": {
+        "__type": {
+          "type": "string",
+          "const": "datetime"
+        },
+        "__var": { "$ref": "#/definitions/datetime" }
+      },
+      "required": [
+        "__type",
+        "__var"
+      ],
+      "additionalProperties": false
+    },
+    "timedelta": {
+      "type": "number",
+      "minimum": 0
+    },
+    "typed_timedelta": {
+      "type": "object",
+      "properties": {
+        "__type": {
+          "type": "string",
+          "const": "timedelta"
+        },
+        "__var": { "$ref": "#/definitions/timedelta" }
+      },
+      "required": [
+        "__type",
+        "__var"
+      ],
+      "additionalProperties": false
+    },
+    "typed_relativedelta": {
+      "type": "object",
+      "description": "A dateutil.relativedelta.relativedelta object",
+      "properties": {
+        "__type": {
+          "type": "string",
+          "const": "relativedelta"
+        },
+        "__var": {
+          "type": "object",
+          "properties": {
+            "weekday": {
+              "type": "array",
+              "items": { "type": "integer" },
+              "minItems": 1,
+              "maxItems": 2
+            }
+          },
+          "additionalProperties": { "type": "integer" }
+        }
+      }
+    },
+    "timezone": {
+      "type": "string"
+    },
+    "dict": {
+      "description": "A python dictionary containing values of any type",
+      "type": "object"
+    },
+    "color": {
+      "type": "string",
+      "pattern": "^#[a-fA-F0-9]{3,6}$"
+    },
+    "stringset": {
+      "description": "A set of strings",
+      "type": "object",
+      "properties": {
+        "__type": {
+          "type": "string",
+          "const": "set"
+        },
+        "__var": {
+          "type": "array",
+          "items": { "type": "string" }
+        }
+      },
+      "required": [
+        "__type",
+        "__var"
+      ]
+    },
+    "dag": {
+      "type": "object",
+      "properties": {
+        "params": { "$ref": "#/definitions/dict" },
+        "_dag_id": { "type": "string" },
+        "task_dict": {  "$ref": "#/definitions/task_dict" },
+        "timezone": { "$ref": "#/definitions/timezone" },
+        "schedule_interval": {
+          "anyOf": [
+            { "type": "null" },
+            { "type": "string" },
+            { "$ref": "#/definitions/typed_timedelta" },
+            { "$ref": "#/definitions/typed_relativedelta" }
+          ]
+        },
+        "catchup": { "type": "boolean" },
+        "is_subdag": { "type": "boolean" },
+        "fileloc": { "type" : "string"},
+        "orientation": { "type" : "string"},
+        "_description": { "type" : "string"},
+        "_concurrency": { "type" : "number"},
+        "max_active_runs": { "type" : "number"},
+        "default_args": { "$ref": "#/definitions/dict" },
+        "start_date": { "$ref": "#/definitions/datetime" },
+        "dagrun_timeout": { "$ref": "#/definitions/timedelta" },
+        "doc_md": { "type" : "string"}
+      },
+      "required": [
+        "params",
+        "_dag_id",
+        "fileloc",
+        "task_dict"
+      ],
+      "additionalProperties": false
+    },
+    "task_dict": {
+      "type": "object",
+      "additionalProperties": { "$ref": "#/definitions/operator" }
+    },
+    "operator": {
+      "$comment": "A task/operator in a DAG",
+      "type": "object",
+      "required": [
+        "_task_type",
+        "_task_module",
+        "task_id",
+        "ui_color",
+        "ui_fgcolor",
+        "template_fields"
+      ],
+      "properties": {
+        "_task_type": { "type": "string" },
+        "_task_module": { "type": "string" },
+        "task_id": { "type": "string" },
+        "owner": { "type": "string" },
+        "start_date": { "$ref": "#/definitions/datetime" },
+        "end_date": { "$ref": "#/definitions/datetime" },
+        "trigger_rule": { "type": "string" },
+        "depends_on_past": { "type": "boolean" },
+        "wait_for_downstream": { "type": "boolean" },
+        "retries": { "type": "number" },
+        "queue": { "type": "string" },
+        "pool": { "type": "string" },
+        "execution_timeout": { "$ref": "#/definitions/timedelta" },
+        "retry_delay": { "$ref": "#/definitions/timedelta" },
+        "retry_exponential_backoff": { "type": "boolean" },
+        "params": { "$ref": "#/definitions/dict" },
+        "priority_weight": { "type": "number" },
+        "weight_rule": { "type": "string" },
+        "executor_config": { "$ref": "#/definitions/dict" },
+        "do_xcom_push": { "type": "boolean" },
+        "ui_color": { "$ref": "#/definitions/color" },
+        "ui_fgcolor": { "$ref": "#/definitions/color" },
+        "template_fields": {
 
 Review comment:
   Oh duh, i forgot about that :)
   
   Mostly because the extra "type deocration" isn't needed anymore and we've 
removed it in cases where it is un-ambigious to the deserializer. (For the huge 
dags  we've been testing with the parsing time starts to have a non-trivial 
impact on performance). Since `stringset` isn't used anymore we should probably 
remove that definition instead.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to