potiuk commented on a change in pull request #22422:
URL: https://github.com/apache/airflow/pull/22422#discussion_r834045820



##########
File path: airflow/providers/databricks/operators/databricks_repos.py
##########
@@ -28,12 +29,131 @@
     from airflow.utils.context import Context
 
 
+class DatabricksReposCreateOperator(BaseOperator):
+    """
+    Creates a Databricks Repo
+    using
+    `POST api/2.0/repos 
<https://docs.databricks.com/dev-tools/api/latest/repos.html#operation/create-repo>`_
+    API endpoint and optionally checking it out to a specific branch or tag.
+
+    :param git_url: Required HTTPS URL of a Git repository
+    :param git_provider: Optional name of Git provider. Must be provided if we 
can't guess its name from URL.
+    :param repo_path: optional path for a repository. Must be in the format 
``/Repos/{folder}/{repo-name}``.
+        If not specified, it will be created in the user's directory.
+    :param branch: optional name of branch to check out.
+    :param tag: optional name of tag to checkout.
+    :param databricks_conn_id: Reference to the :ref:`Databricks connection 
<howto/connection:databricks>`.
+        By default and in the common case this will be ``databricks_default``. 
To use
+        token based authentication, provide the key ``token`` in the extra 
field for the
+        connection and create the key ``host`` and leave the ``host`` field 
empty.
+    :param databricks_retry_limit: Amount of times retry if the Databricks 
backend is
+        unreachable. Its value must be greater than or equal to 1.
+    :param databricks_retry_delay: Number of seconds to wait between retries 
(it
+            might be a floating point number).
+    """
+
+    # Used in airflow.models.BaseOperator
+    template_fields: Sequence[str] = ('repo_path', 'git_url', 'tag', 'branch')
+
+    __git_providers__ = {
+        "github.com": "gitHub",
+        "dev.azure.com": "azureDevOpsServices",
+        "gitlab.com": "gitLab",
+        "bitbucket.org": "bitbucketCloud",
+    }
+    __aws_code_commit_regexp__ = 
re.compile(r"^git-codecommit\.[^.]+\.amazonaws.com$")
+    __repos_path_regexp__ = re.compile(r"/Repos/[^/]+/[^/]+/?$")
+
+    def __init__(
+        self,
+        *,
+        git_url: str,
+        git_provider: Optional[str] = None,
+        branch: Optional[str] = None,
+        tag: Optional[str] = None,
+        repo_path: Optional[str] = None,
+        databricks_conn_id: str = 'databricks_default',
+        databricks_retry_limit: int = 3,
+        databricks_retry_delay: int = 1,
+        **kwargs,
+    ) -> None:
+        """Creates a new ``DatabricksReposCreateOperator``."""
+        super().__init__(**kwargs)
+        self.databricks_conn_id = databricks_conn_id
+        self.databricks_retry_limit = databricks_retry_limit
+        self.databricks_retry_delay = databricks_retry_delay
+        self.git_url = git_url
+        if git_provider is None:
+            self.git_provider = self.__detect_repo_provider__(git_url)
+            if self.git_provider is None:
+                raise AirflowException(
+                    "git_provider isn't specified and couldn't be guessed" f" 
for URL {git_url}"
+                )
+        else:
+            self.git_provider = git_provider
+        if repo_path is not None and not 
self.__repos_path_regexp__.match(repo_path):
+            raise AirflowException(
+                f"repo_path should have form of 
/Repos/{{folder}}/{{repo-name}}, got '{repo_path}'"
+            )

Review comment:
       good point @josh-fell !




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to