This is an automated email from the ASF dual-hosted git repository.

hez pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git


The following commit(s) were added to refs/heads/main by this push:
     new 22efe880a feat: Support external repositories (#5322)
22efe880a is described below

commit 22efe880a3e96955769bd1324b05066dcb292f68
Author: Camille Teruel <[email protected]>
AuthorDate: Wed Jun 7 00:39:49 2023 +0200

    feat: Support external repositories (#5322)
    
    Co-authored-by: Camille Teruel <[email protected]>
---
 .../python/plugins/azuredevops/azuredevops/api.py  |  6 ++++++
 .../python/plugins/azuredevops/azuredevops/main.py | 22 +++++++++++++++++++---
 .../plugins/azuredevops/azuredevops/models.py      |  4 ++++
 .../azuredevops/azuredevops/streams/builds.py      |  2 +-
 .../azuredevops/streams/pull_request_commits.py    |  3 +++
 .../azuredevops/streams/pull_requests.py           |  3 +++
 backend/python/pydevlake/pydevlake/plugin.py       | 19 ++++++++++++++-----
 7 files changed, 50 insertions(+), 9 deletions(-)

diff --git a/backend/python/plugins/azuredevops/azuredevops/api.py 
b/backend/python/plugins/azuredevops/azuredevops/api.py
index 9461d0890..73a35de5a 100644
--- a/backend/python/plugins/azuredevops/azuredevops/api.py
+++ b/backend/python/plugins/azuredevops/azuredevops/api.py
@@ -74,3 +74,9 @@ class AzureDevOpsAPI(API):
 
     def jobs(self, org: str, project: str, build_id: int):
         return self.get(org, project, '_apis/build/builds', build_id, 
'timeline')
+
+    def endpoints(self, org: str, project: str):
+        return self.get(org, project, '_apis/serviceendpoint/endpoints')
+
+    def external_repositories(self, org: str, project: str, provider: str, 
endpoint_id: str):
+        return self.get(org, project, '_apis/sourceProviders', provider, 
'repositories', serviceEndpointId=endpoint_id)
diff --git a/backend/python/plugins/azuredevops/azuredevops/main.py 
b/backend/python/plugins/azuredevops/azuredevops/main.py
index 3a96a1964..28d512572 100644
--- a/backend/python/plugins/azuredevops/azuredevops/main.py
+++ b/backend/python/plugins/azuredevops/azuredevops/main.py
@@ -91,6 +91,21 @@ class AzureDevOpsPlugin(Plugin):
                 repo.parent_repository_url = 
raw_repo["parentRepository"]["url"]
             yield repo
 
+        for endpoint in api.endpoints(org, proj):
+            provider = endpoint['type']
+            res = api.external_repositories(org, proj, provider, 
endpoint['id'])
+            for repo in res.json['repositories']:
+                props = repo['properties']
+                yield GitRepository(
+                    id=repo['id'],
+                    name=repo['name'],
+                    project_id=proj,
+                    org_id=org,
+                    provider=provider,
+                    url=props['cloneUrl'],
+                    defaultBranch=props.get('defaultBranch', 'main')
+                )
+
     def test_connection(self, connection: AzureDevOpsConnection):
         api = AzureDevOpsAPI(connection)
         if connection.organization is None:
@@ -107,7 +122,7 @@ class AzureDevOpsPlugin(Plugin):
                 raise Exception(f"Invalid token: {e}")
 
     def extra_tasks(self, scope: GitRepository, tx_rule: 
AzureDevOpsTransformationRule, entity_types: list[DomainType], connection: 
AzureDevOpsConnection):
-        if DomainType.CODE in entity_types:
+        if DomainType.CODE in entity_types and not scope.is_external():
             url = urlparse(scope.remote_url)
             url = 
url._replace(netloc=f'{url.username}:{connection.token.get_secret_value()}@{url.hostname}')
             yield gitextractor(url.geturl(), scope.domain_id(), 
connection.proxy)
@@ -115,8 +130,9 @@ class AzureDevOpsPlugin(Plugin):
     def extra_stages(self, scope_tx_rule_pairs: list[ScopeTxRulePair], 
entity_types: list[DomainType], _):
         if DomainType.CODE in entity_types:
             for scope, tx_rule in scope_tx_rule_pairs:
-                options = tx_rule.refdiff if tx_rule else None
-                yield [refdiff(scope.id, options)]
+                if not scope.is_external():
+                    options = tx_rule.refdiff if tx_rule else None
+                    yield [refdiff(scope.id, options)]
 
     @property
     def streams(self):
diff --git a/backend/python/plugins/azuredevops/azuredevops/models.py 
b/backend/python/plugins/azuredevops/azuredevops/models.py
index d6de95a8e..07c7626b7 100644
--- a/backend/python/plugins/azuredevops/azuredevops/models.py
+++ b/backend/python/plugins/azuredevops/azuredevops/models.py
@@ -44,6 +44,10 @@ class GitRepository(ToolScope, table=True):
     project_id: str
     org_id: str
     parent_repository_url: Optional[str] = Field(source='parentRepository/url')
+    provider: Optional[str]
+
+    def is_external(self):
+        return bool(self.provider)
 
 
 class GitPullRequest(ToolModel, table=True):
diff --git a/backend/python/plugins/azuredevops/azuredevops/streams/builds.py 
b/backend/python/plugins/azuredevops/azuredevops/streams/builds.py
index ad349bc67..f9e6a37cb 100644
--- a/backend/python/plugins/azuredevops/azuredevops/streams/builds.py
+++ b/backend/python/plugins/azuredevops/azuredevops/streams/builds.py
@@ -31,7 +31,7 @@ class Builds(Stream):
     def collect(self, state, context) -> Iterable[tuple[object, dict]]:
         repo: GitRepository = context.scope
         api = AzureDevOpsAPI(context.connection)
-        response = api.builds(repo.org_id, repo.project_id, repo.id, 'tfsgit')
+        response = api.builds(repo.org_id, repo.project_id, repo.id, 
repo.provider)
         for raw_build in response:
             yield raw_build, state
 
diff --git 
a/backend/python/plugins/azuredevops/azuredevops/streams/pull_request_commits.py
 
b/backend/python/plugins/azuredevops/azuredevops/streams/pull_request_commits.py
index a7c42ab0e..c06e77309 100644
--- 
a/backend/python/plugins/azuredevops/azuredevops/streams/pull_request_commits.py
+++ 
b/backend/python/plugins/azuredevops/azuredevops/streams/pull_request_commits.py
@@ -27,6 +27,9 @@ class GitPullRequestCommits(Substream):
     domain_types = [DomainType.CODE]
     parent_stream = GitPullRequests
 
+    def should_run_on(self, scope: GitRepository) -> bool:
+        return not scope.is_external()
+
     def collect(self, state, context, parent: GitPullRequest) -> 
Iterable[tuple[object, dict]]:
         repo: GitRepository = context.scope
         azuredevops_api = AzureDevOpsAPI(context.connection)
diff --git 
a/backend/python/plugins/azuredevops/azuredevops/streams/pull_requests.py 
b/backend/python/plugins/azuredevops/azuredevops/streams/pull_requests.py
index 2d32ff091..6922d1b7e 100644
--- a/backend/python/plugins/azuredevops/azuredevops/streams/pull_requests.py
+++ b/backend/python/plugins/azuredevops/azuredevops/streams/pull_requests.py
@@ -25,6 +25,9 @@ class GitPullRequests(Stream):
     tool_model = GitPullRequest
     domain_types = [DomainType.CODE]
 
+    def should_run_on(self, scope: GitRepository) -> bool:
+        return not scope.is_external()
+
     def collect(self, state, context) -> Iterable[tuple[object, dict]]:
         api = AzureDevOpsAPI(context.connection)
         repo: GitRepository = context.scope
diff --git a/backend/python/pydevlake/pydevlake/plugin.py 
b/backend/python/pydevlake/pydevlake/plugin.py
index e94791895..fecdc0f11 100644
--- a/backend/python/pydevlake/pydevlake/plugin.py
+++ b/backend/python/pydevlake/pydevlake/plugin.py
@@ -23,6 +23,7 @@ import fire
 
 import pydevlake.message as msg
 from pydevlake.subtasks import Subtask
+from pydevlake.logger import logger
 from pydevlake.ipc import PluginCommands
 from pydevlake.context import Context
 from pydevlake.stream import Stream, DomainType
@@ -95,13 +96,20 @@ class Plugin(ABC):
         pass
 
     def collect(self, ctx: Context, stream: str):
-        yield from self.get_stream(stream).collector.run(ctx)
+        return self._run_stream(ctx, stream, 'collector')
 
     def extract(self, ctx: Context, stream: str):
-        yield from self.get_stream(stream).extractor.run(ctx)
+        return self._run_stream(ctx, stream, 'extractor')
 
     def convert(self, ctx: Context, stream: str):
-        yield from self.get_stream(stream).convertor.run(ctx)
+        return self._run_stream(ctx, stream, 'convertor')
+
+    def _run_stream(self, ctx: Context, stream_name: str, subtask: str):
+        stream = self.get_stream(stream_name)
+        if stream.should_run_on(ctx.scope):
+            yield from getattr(stream, subtask).run(ctx)
+        else:
+            logger.info(f"Skipping stream {stream.name} for scope 
{ctx.scope.name}")
 
     def make_remote_scopes(self, connection: Connection, group_id: 
Optional[str] = None) -> msg.RemoteScopes:
         if group_id:
@@ -125,6 +133,7 @@ class Plugin(ABC):
         """
         Make a simple pipeline using the scopes declared by the plugin.
         """
+        entity_types = entity_types or list(DomainType)
         plan = self.make_pipeline_plan(scope_tx_rule_pairs, entity_types, 
connection)
         domain_scopes = []
         for tool_scope, _ in scope_tx_rule_pairs:
@@ -194,7 +203,7 @@ class Plugin(ABC):
                     subtasks.append(subtask.name)
         return subtasks
 
-    def get_stream(self, stream_name: str):
+    def get_stream(self, stream_name: str) -> Stream:
         stream = self._streams.get(stream_name)
         if stream is None:
             raise Exception(f'Unknown stream {stream_name}')
@@ -207,7 +216,7 @@ class Plugin(ABC):
                 entry_point_name=subtask.verb,
                 arguments=[subtask.stream.name],
                 required=True,
-                enabled_by_default=True,
+                enabled_by_default=False,
                 description=subtask.description,
                 domain_types=[dm.value for dm in subtask.stream.domain_types]
             )

Reply via email to