alexott commented on a change in pull request #22422:
URL: https://github.com/apache/airflow/pull/22422#discussion_r834566960
##########
File path: airflow/providers/databricks/operators/databricks_repos.py
##########
@@ -28,12 +29,131 @@
from airflow.utils.context import Context
+class DatabricksReposCreateOperator(BaseOperator):
+ """
+ Creates a Databricks Repo
+ using
+ `POST api/2.0/repos
<https://docs.databricks.com/dev-tools/api/latest/repos.html#operation/create-repo>`_
+ API endpoint and optionally checking it out to a specific branch or tag.
+
+ :param git_url: Required HTTPS URL of a Git repository
+ :param git_provider: Optional name of Git provider. Must be provided if we
can't guess its name from URL.
+ :param repo_path: optional path for a repository. Must be in the format
``/Repos/{folder}/{repo-name}``.
+ If not specified, it will be created in the user's directory.
+ :param branch: optional name of branch to check out.
+ :param tag: optional name of tag to checkout.
+ :param databricks_conn_id: Reference to the :ref:`Databricks connection
<howto/connection:databricks>`.
+ By default and in the common case this will be ``databricks_default``.
To use
+ token based authentication, provide the key ``token`` in the extra
field for the
+ connection and create the key ``host`` and leave the ``host`` field
empty.
+ :param databricks_retry_limit: Amount of times retry if the Databricks
backend is
+ unreachable. Its value must be greater than or equal to 1.
+ :param databricks_retry_delay: Number of seconds to wait between retries
(it
+ might be a floating point number).
+ """
+
+ # Used in airflow.models.BaseOperator
+ template_fields: Sequence[str] = ('repo_path', 'git_url', 'tag', 'branch')
+
+ __git_providers__ = {
+ "github.com": "gitHub",
+ "dev.azure.com": "azureDevOpsServices",
+ "gitlab.com": "gitLab",
+ "bitbucket.org": "bitbucketCloud",
+ }
+ __aws_code_commit_regexp__ =
re.compile(r"^git-codecommit\.[^.]+\.amazonaws.com$")
+ __repos_path_regexp__ = re.compile(r"/Repos/[^/]+/[^/]+/?$")
+
+ def __init__(
+ self,
+ *,
+ git_url: str,
+ git_provider: Optional[str] = None,
+ branch: Optional[str] = None,
+ tag: Optional[str] = None,
+ repo_path: Optional[str] = None,
+ databricks_conn_id: str = 'databricks_default',
+ databricks_retry_limit: int = 3,
+ databricks_retry_delay: int = 1,
+ **kwargs,
+ ) -> None:
+ """Creates a new ``DatabricksReposCreateOperator``."""
+ super().__init__(**kwargs)
+ self.databricks_conn_id = databricks_conn_id
+ self.databricks_retry_limit = databricks_retry_limit
+ self.databricks_retry_delay = databricks_retry_delay
+ self.git_url = git_url
+ if git_provider is None:
+ self.git_provider = self.__detect_repo_provider__(git_url)
+ if self.git_provider is None:
+ raise AirflowException(
+ "git_provider isn't specified and couldn't be guessed" f"
for URL {git_url}"
+ )
+ else:
+ self.git_provider = git_provider
+ if repo_path is not None and not
self.__repos_path_regexp__.match(repo_path):
+ raise AirflowException(
+ f"repo_path should have form of
/Repos/{{folder}}/{{repo-name}}, got '{repo_path}'"
+ )
Review comment:
@potiuk @josh-fell I've addressed review comments...
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]