josh-fell commented on a change in pull request #20998: URL: https://github.com/apache/airflow/pull/20998#discussion_r790381818
########## File path: airflow/providers/dbt/cloud/hooks/dbt.py ########## @@ -0,0 +1,488 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json +import sys +import time +from enum import Enum +from functools import wraps +from inspect import signature +from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, Union + +from requests import PreparedRequest, Session +from requests.auth import AuthBase +from requests.models import Response + +from airflow.exceptions import AirflowException +from airflow.models import Connection +from airflow.providers.http.hooks.http import HttpHook +from airflow.typing_compat import TypedDict + +if sys.version_info >= (3, 8): + from functools import cached_property +else: + from cached_property import cached_property + + +def fallback_to_default_account(func: Callable) -> Callable: + """ + Decorator which provides a fallback value for ``account_id``. If the ``account_id`` is None or not passed + to the decorated function, the value will be taken from the configured dbt Cloud Airflow Connection. + """ + sig = signature(func) + + @wraps(func) + def wrapper(*args, **kwargs) -> Callable: + bound_args = sig.bind(*args, **kwargs) + + # Check if ``account_id`` was not included in the function signature or, if it is, the value is not + # provided. + if bound_args.arguments.get("account_id") is None: + self = args[0] + default_account_id = self.conn.login + if not default_account_id: + raise AirflowException("Could not determine the dbt Cloud account.") + + bound_args.arguments["account_id"] = int(default_account_id) + + return func(*bound_args.args, **bound_args.kwargs) + + return wrapper + + +class TokenAuth(AuthBase): + """Helper class for Auth when executing requests.""" + + def __init__(self, token: str) -> None: + self.token = token + + def __call__(self, request: PreparedRequest) -> PreparedRequest: + request.headers["Content-Type"] = "application/json" + request.headers["Authorization"] = f"Token {self.token}" + + return request + + +class JobRunInfo(TypedDict): + """Type class for the ``job_run_info`` dictionary.""" + + account_id: int + run_id: int + + +class DbtCloudJobRunStatus(Enum): + """dbt Cloud Job statuses.""" + + QUEUED = 1 + STARTING = 2 + RUNNING = 3 + SUCCESS = 10 + ERROR = 20 + CANCELLED = 30 + TERMINAL_STATUSES = (SUCCESS, ERROR, CANCELLED) + + @classmethod + def check_is_valid(cls, statuses: Union[int, Sequence[int], Set[int]]): + """Validates input statuses are a known value.""" + if isinstance(statuses, (Sequence, Set)): + for status in statuses: + cls(status) + else: + cls(statuses) + + @classmethod + def is_terminal(cls, status: int) -> bool: + """Checks if the input status is that of a terminal type.""" + cls.check_is_valid(statuses=status) + + return status in cls.TERMINAL_STATUSES.value + + +class DbtCloudJobRunException(AirflowException): + """An exception that indicates a job run failed to complete.""" + + +class DbtCloudHook(HttpHook): + """ + Interact with dbt Cloud using the V2 API. + + :param dbt_cloud_conn_id: The ID of the :ref:`dbt Cloud connection <howto/connection:dbt-cloud>`. + """ + + conn_name_attr = "dbt_cloud_conn_id" + default_conn_name = "dbt_cloud_default" + conn_type = "dbt_cloud" + hook_name = "dbt Cloud" + + @staticmethod + def get_ui_field_behaviour() -> Dict[str, Any]: + """Builds custom field behavior for the dbt Cloud connection form in the Airflow UI.""" + return { + "hidden_fields": ["host", "port", "schema", "extra"], + "relabeling": {"login": "Account ID", "password": "API Token"}, + } + + def __init__(self, dbt_cloud_conn_id: str = default_conn_name, *args, **kwargs) -> None: + super().__init__(auth_type=TokenAuth) + self.dbt_cloud_conn_id = dbt_cloud_conn_id + self.base_url = "https://cloud.getdbt.com/api/v2/accounts/" + + @cached_property + def conn(self) -> Connection: + _conn = self.get_connection(self.dbt_cloud_conn_id) + if not _conn.password: + raise AirflowException("An API token is required to connect to dbt Cloud.") + + return _conn + + def get_conn(self, *args, **kwargs) -> Session: + session = Session() + session.auth = self.auth_type(self.conn.password) + + return session + + def _paginate(self, endpoint: str, payload: Optional[Dict[str, Any]] = None) -> List[Response]: + results = [] + response = self.run(endpoint=endpoint, data=payload) + resp_json = response.json() + limit = resp_json["extra"]["filters"]["limit"] + num_total_results = resp_json["extra"]["pagination"]["total_count"] + num_current_results = resp_json["extra"]["pagination"]["count"] + results.append(response) + + if not num_current_results == num_total_results: + _paginate_payload = payload.copy() if payload else {} + _paginate_payload["offset"] = limit + + while True: + if num_current_results < num_total_results: + response = self.run(endpoint=endpoint, data=_paginate_payload) + resp_json = response.json() + if resp_json["data"]: + results.append(response) + num_current_results += resp_json["extra"]["pagination"]["count"] + _paginate_payload["offset"] += limit + else: + break + + return results + + def _run_and_get_response( + self, + method: str = "GET", + endpoint: Optional[str] = None, + payload: Union[str, Dict[str, Any], None] = None, + paginate: bool = False, + ) -> Any: + self.method = method + + if paginate: + if isinstance(payload, str): + raise ValueError("Payload cannot be a string to paginate a response.") + + if endpoint: + return self._paginate(endpoint=endpoint, payload=payload) + else: + raise ValueError("An endpoint is needed to paginate a response.") + + return self.run(endpoint=endpoint, data=payload) + + def list_accounts(self) -> List[Response]: + """ + Retrieves all of the dbt Cloud accounts the configured API token is authorized to access. + + :return: List of request responses. + """ + return self._run_and_get_response() + + @fallback_to_default_account + def get_account(self, account_id: Optional[int] = None) -> Response: + """ + Retrieves metadata for a specific dbt Cloud account. + + :param account_id: Optional. The ID of a dbt Cloud account. + :return: The request response. + """ + return self._run_and_get_response(endpoint=f"{account_id}/") + + @fallback_to_default_account + def list_projects(self, account_id: Optional[int] = None) -> List[Response]: + """ + Retrieves metadata for all projects tied to a specified dbt Cloud account. + + :param account_id: Optional. The ID of a dbt Cloud account. + :return: List of request responses. + """ + return self._run_and_get_response(endpoint=f"{account_id}/projects/", paginate=True) + + @fallback_to_default_account + def get_project(self, project_id: int, account_id: Optional[int] = None) -> Response: + """ + Retrieves metadata for a specific project. + + :param project_id: The ID of a dbt Cloud project. + :param account_id: Optional. The ID of a dbt Cloud account. + :return: The request response. + """ + return self._run_and_get_response(endpoint=f"{account_id}/projects/{project_id}/") + + @fallback_to_default_account + def list_jobs( + self, + account_id: Optional[int] = None, + order_by: Optional[str] = None, + project_id: Optional[int] = None, + ) -> List[Response]: + """ + Retrieves metadata for all jobs tied to a specified dbt Cloud account. If a ``project_id`` is + supplied, only jobs pertaining to this job will be retrieved. + + :param account_id: Optional. The ID of a dbt Cloud account. + :param order_by: Optional. Field to order the result by. Use '-' to indicate reverse order. + For example, to use reverse order by the run ID use ``order_by=-id``. + :param project_id: The ID of a dbt Cloud project. + :return: List of request responses. + """ + return self._run_and_get_response( + endpoint=f"{account_id}/jobs/", + payload={"order_by": order_by, "project_id": project_id}, + paginate=True, + ) + + @fallback_to_default_account + def get_job(self, job_id: int, account_id: Optional[int] = None) -> Response: + """ + Retrieves metadata for a specific job. + + :param job_id: The ID of a dbt Cloud job. + :param account_id: Optional. The ID of a dbt Cloud account. + :return: The request response. + """ + return self._run_and_get_response(endpoint=f"{account_id}/jobs/{job_id}") + + @fallback_to_default_account + def trigger_job_run( + self, + job_id: int, + cause: str, + account_id: Optional[int] = None, + steps_override: Optional[List[str]] = None, + schema_override: Optional[str] = None, + additional_run_config: Optional[Dict[str, Any]] = None, + ) -> Response: + """ + Triggers a run of a dbt Cloud job. + + :param job_id: The ID of a dbt Cloud job. + :param cause: Description of the reason to trigger the job. + :param account_id: Optional. The ID of a dbt Cloud account. + :param steps_override: Optional. List of dbt commands to execute when triggering the job + instead of those configured in dbt Cloud. + :param schema_override: Optional. Override the destination schema in the configured target for this + job. + :param additional_run_config: Optional. Any additional parameters that should be included in the API + request when triggering the job. + :return: The request response. + """ + if additional_run_config is None: + additional_run_config = {} + + payload = { + "cause": cause, + "steps_override": steps_override, + "schema_override": schema_override, + } + payload.update(additional_run_config) + + return self._run_and_get_response( + method="POST", + endpoint=f"{account_id}/jobs/{job_id}/run/", + payload=json.dumps(payload), + ) + + @fallback_to_default_account + def list_job_runs( + self, + account_id: Optional[int] = None, + include_related: Optional[List[str]] = None, + job_definition_id: Optional[int] = None, + order_by: Optional[str] = None, + ) -> List[Response]: + """ + Retrieves metadata for all of the dbt Cloud job runs for an account. If a ``job_definition_id`` is + supplied, only metadata for runs of that specific job are pulled. + + :param account_id: Optional. The ID of a dbt Cloud account. + :param include_related: Optional. List of related fields to pull with the run. + Valid values are "trigger", "job", "repository", and "environment". + :param job_definition_id: Optional. The dbt Cloud job ID to retrieve run metadata. + :param order_by: Optional. Field to order the result by. Use '-' to indicate reverse order. + For example, to use reverse order by the run ID use ``order_by=-id``. + :return: List of request responses. + """ + return self._run_and_get_response( + endpoint=f"{account_id}/runs/", + payload={ + "include_related": include_related, + "job_definition_id": job_definition_id, + "order_by": order_by, + }, + paginate=True, + ) + + @fallback_to_default_account + def get_job_run( + self, run_id: int, account_id: Optional[int] = None, include_related: Optional[List[str]] = None + ) -> Response: + """ + Retrieves metadata for a specific run of a dbt Cloud job. + + :param run_id: The ID of a dbt Cloud job run. + :param account_id: Optional. The ID of a dbt Cloud account. + :param include_related: Optional. List of related fields to pull with the run. + Valid values are "trigger", "job", "repository", and "environment". + :return: The request response. + """ + return self._run_and_get_response( + endpoint=f"{account_id}/runs/{run_id}/", Review comment: This should be caught by the `@fallback_to_default_account` decorator which wraps every hook method that calls a dbt Cloud API endpoint directly. The decorator checks to see if the `account_id` is not passed or if it is, if the value is `None`. If either of those conditions are met, the `account_id` set in the Airflow connection is used. In the event there is no `account_id` configured in the connection, this exception is raised: `raise AirflowException("Could not determine the dbt Cloud account.")`. Does that suffice? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
