kyle-winkelman commented on issue #29733:
URL: https://github.com/apache/airflow/issues/29733#issuecomment-1442445629
I have come up with a work around that uses the SimpleHttpOperator to
directly hit the `api/2.1/jobs/create` (or `api/2.1/jobs/reset`) endpoint and
then XComs the `job_id` for use by the DatabricksRunNow operator.
```python
from airflow import DAG
from airflow.models import Variable
from airflow.providers.databricks.operators.databricks import
DatabricksRunNowOperator
from airflow.providers.http.operators.http import SimpleHttpOperator
from airflow.utils.task_group import TaskGroup
from datetime import datetime
with DAG(dag_id = 'CREATE_OR_RESET_THEN_RUN_NOW', schedule_interval =
'@hourly', start_date = datetime(2020, 1, 1), catchup = False) as dag:
def databricks_jobs_operator(group_id, job, conn_id =
'databricks_default'):
with TaskGroup(group_id=group_id) as tg:
jobs_create_or_reset = SimpleHttpOperator(
task_id = 'create_or_reset',
http_conn_id = conn_id,
endpoint = f"{{{{ 'api/2.1/jobs/create' if
ti.xcom_pull(task_ids='{group_id}.create_or_reset', include_prior_dates=True)
is none else 'api/2.1/jobs/reset' }}}}",
method = 'POST',
data = f"{{{{ ({job} if
ti.xcom_pull(task_ids='{group_id}.create_or_reset', include_prior_dates=True)
is none else {{'job_id': ti.xcom_pull(task_ids='{group_id}.create_or_reset',
include_prior_dates=True), 'new_settings': {job}}}) | tojson }}}}",
headers = {'Content-Type': 'application/json'},
response_filter = lambda response, ti:
response.json()['job_id'] if
ti.xcom_pull(task_ids=f"{group_id}.create_or_reset", include_prior_dates=True)
is None else ti.xcom_pull(task_ids=f"{group_id}.create_or_reset",
include_prior_dates=True),
)
jobs_run_now = DatabricksRunNowOperator(
task_id = 'run_now',
job_id = f"{{{{
ti.xcom_pull(task_ids='{group_id}.create_or_reset') }}}}",
databricks_conn_id = conn_id,
)
jobs_create_or_reset >> jobs_run_now
return tg
job = {
'name': 'test',
'tasks': [
{
'task_key': 'test1',
'notebook_task': {
'notebook_path': '/Users/[email protected]/test2'
},
'job_cluster_key': 'job_cluster'
},
{
'task_key': 'test2',
'depends_on': [
{
'task_key': 'test1'
}
],
'notebook_task': {
'notebook_path': '/Users/[email protected]/test2'
},
'job_cluster_key': 'job_cluster'
}
],
'job_clusters': [
{
'job_cluster_key': 'job_cluster',
'new_cluster': {
'spark_version': '10.4.x-scala2.12',
'node_type_id': 'Standard_D3_v2',
'num_workers': 2,
}
}
]
}
test = databricks_jobs_operator(group_id='test', job=job)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]