This is an automated email from the ASF dual-hosted git repository.

lynwee pushed a commit to branch lw-fix-azure-0305
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git

commit 50a581ff392a5a78f46b133f600056ad6ad52a49
Author: Lynwee Hou <[email protected]>
AuthorDate: Tue Mar 5 16:05:53 2024 +0800

    fix(azuredevops): fix some bugs
---
 backend/plugins/gitextractor/parser/clone.go        |  6 +++++-
 .../python/plugins/azuredevops/azuredevops/main.py  |  2 +-
 .../azuredevops/azuredevops/streams/builds.py       |  8 ++++++++
 .../plugins/azuredevops/azuredevops/streams/jobs.py |  6 ++++++
 .../azuredevops/streams/pull_request_commits.py     | 12 ++++++++++--
 .../azuredevops/streams/pull_requests.py            |  6 ++++++
 backend/python/pydevlake/pydevlake/api.py           |  7 +++++++
 backend/python/pydevlake/pydevlake/subtasks.py      | 21 +++++++++++++++------
 8 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/backend/plugins/gitextractor/parser/clone.go 
b/backend/plugins/gitextractor/parser/clone.go
index f8739e731..809aeaafd 100644
--- a/backend/plugins/gitextractor/parser/clone.go
+++ b/backend/plugins/gitextractor/parser/clone.go
@@ -122,7 +122,11 @@ func (l *GitRepoCreator) cloneOverHTTP(ctx 
plugin.SubTaskContext, withGoGit bool
                done <- struct{}{}
                if err != nil {
                        // Some sensitive information such as password will be 
released in this err.
-                       err = fmt.Errorf("plain clone git error")
+                       if err.Error() == "repository not found" {
+                               // do nothing, it's a safe error message.
+                       } else {
+                               err = fmt.Errorf("plain clone git error")
+                       }
                        l.logger.Error(err, "PlainCloneContext")
                        return nil, err
                }
diff --git a/backend/python/plugins/azuredevops/azuredevops/main.py 
b/backend/python/plugins/azuredevops/azuredevops/main.py
index d31b53b3d..aab9b0061 100644
--- a/backend/python/plugins/azuredevops/azuredevops/main.py
+++ b/backend/python/plugins/azuredevops/azuredevops/main.py
@@ -120,7 +120,7 @@ class AzureDevOpsPlugin(Plugin):
         hint = None
         try:
             if connection.organization is None:
-                hint = "You may need to edit your token to set organization to 
'All accessible organizations"
+                hint = "You may need to edit your token to set organization to 
'All accessible organizations'"
                 res = api.my_profile()
             else:
                 hint = "Organization name may be incorrect or your token may 
not have access to the organization."
diff --git a/backend/python/plugins/azuredevops/azuredevops/streams/builds.py 
b/backend/python/plugins/azuredevops/azuredevops/streams/builds.py
index ef4717759..af2fdf252 100644
--- a/backend/python/plugins/azuredevops/azuredevops/streams/builds.py
+++ b/backend/python/plugins/azuredevops/azuredevops/streams/builds.py
@@ -30,8 +30,16 @@ class Builds(Stream):
     def collect(self, state, context) -> Iterable[tuple[object, dict]]:
         repo: GitRepository = context.scope
         api = AzureDevOpsAPI(context.connection)
+        provider = repo.provider or 'tfsgit'
         response = api.builds(repo.org_id, repo.project_id, repo.id, 
repo.provider or 'tfsgit')
         for raw_build in response:
+            raw_build["x_request_url"] = response.get_url_with_query_string()
+            raw_build["x_request_input"] = {
+                "OrgId": repo.org_id,
+                "ProjectId": repo.project_id,
+                "RepoId": repo.id,
+                "Provider": provider,
+            }
             yield raw_build, state
 
     def convert(self, b: Build, ctx: Context):
diff --git a/backend/python/plugins/azuredevops/azuredevops/streams/jobs.py 
b/backend/python/plugins/azuredevops/azuredevops/streams/jobs.py
index 9c625c5f3..7d2cd99fe 100644
--- a/backend/python/plugins/azuredevops/azuredevops/streams/jobs.py
+++ b/backend/python/plugins/azuredevops/azuredevops/streams/jobs.py
@@ -50,6 +50,12 @@ class Jobs(Substream):
         for raw_job in response.json["records"]:
             if raw_job["type"] == "Job":
                 raw_job["build_id"] = parent.domain_id()
+                raw_job["x_request_url"] = response.get_url_with_query_string()
+                raw_job["x_request_input"] = {
+                    "OrgId": repo.org_id,
+                    "ProjectId": repo.project_id,
+                    "BuildId": parent.id,
+                }
                 yield raw_job, state
 
     def convert(self, j: Job, ctx: Context) -> Iterable[devops.CICDPipeline]:
diff --git 
a/backend/python/plugins/azuredevops/azuredevops/streams/pull_request_commits.py
 
b/backend/python/plugins/azuredevops/azuredevops/streams/pull_request_commits.py
index bd5ee0be8..43df25c28 100644
--- 
a/backend/python/plugins/azuredevops/azuredevops/streams/pull_request_commits.py
+++ 
b/backend/python/plugins/azuredevops/azuredevops/streams/pull_request_commits.py
@@ -15,11 +15,11 @@
 
 from typing import Iterable
 
+import pydevlake.domain_layer.code as code
 from azuredevops.api import AzureDevOpsAPI
 from azuredevops.models import GitPullRequest, GitPullRequestCommit, 
GitRepository
 from azuredevops.streams.pull_requests import GitPullRequests
 from pydevlake import Substream, DomainType
-import pydevlake.domain_layer.code as code
 
 
 class GitPullRequestCommits(Substream):
@@ -34,9 +34,17 @@ class GitPullRequestCommits(Substream):
     def collect(self, state, context, parent: GitPullRequest) -> 
Iterable[tuple[object, dict]]:
         repo: GitRepository = context.scope
         azuredevops_api = AzureDevOpsAPI(context.connection)
-        response = azuredevops_api.git_repo_pull_request_commits(repo.org_id, 
repo.project_id, repo.id, parent.pull_request_id)
+        response = azuredevops_api.git_repo_pull_request_commits(repo.org_id, 
repo.project_id, repo.id,
+                                                                 
parent.pull_request_id)
         for raw_commit in response:
             raw_commit["pull_request_id"] = parent.domain_id()
+            raw_commit["x_request_url"] = response.get_url_with_query_string()
+            raw_commit["x_request_input"] = {
+                "OrgId": repo.org_id,
+                "ProjectId": repo.project_id,
+                "RepoId": repo.id,
+                "PullRequestId": parent.pull_request_id,
+            }
             yield raw_commit, state
 
     def convert(self, commit: GitPullRequestCommit, context) -> 
Iterable[code.PullRequestCommit]:
diff --git 
a/backend/python/plugins/azuredevops/azuredevops/streams/pull_requests.py 
b/backend/python/plugins/azuredevops/azuredevops/streams/pull_requests.py
index 3ae9d97c1..f58776d99 100644
--- a/backend/python/plugins/azuredevops/azuredevops/streams/pull_requests.py
+++ b/backend/python/plugins/azuredevops/azuredevops/streams/pull_requests.py
@@ -34,6 +34,12 @@ class GitPullRequests(Stream):
         repo: GitRepository = context.scope
         response = api.git_repo_pull_requests(repo.org_id, repo.project_id, 
repo.id)
         for raw_pr in response:
+            raw_pr["x_request_url"] = response.get_url_with_query_string()
+            raw_pr["x_request_input"] = {
+                "OrgId": repo.org_id,
+                "ProjectId": repo.project_id,
+                "RepoId": repo.id,
+            }
             yield raw_pr, state
 
     def convert(self, pr: GitPullRequest, ctx):
diff --git a/backend/python/pydevlake/pydevlake/api.py 
b/backend/python/pydevlake/pydevlake/api.py
index 89678c2df..10547cb8a 100644
--- a/backend/python/pydevlake/pydevlake/api.py
+++ b/backend/python/pydevlake/pydevlake/api.py
@@ -17,6 +17,7 @@
 from __future__ import annotations
 
 from typing import Optional, Union
+from urllib.parse import urlencode
 from http import HTTPStatus
 import json
 import time
@@ -71,6 +72,12 @@ class Response:
     def __str__(self):
         return f'{self.request}: {self.status}'
 
+    def get_url_with_query_string(self) -> str:
+        url = self.request.url
+        if self.request.query_args is not None:
+            url = f'{url}?{urlencode(self.request.query_args)}'
+        return url
+
 
 # Sentinel value to abort processing of requests/responses in hooks
 ABORT = object()
diff --git a/backend/python/pydevlake/pydevlake/subtasks.py 
b/backend/python/pydevlake/pydevlake/subtasks.py
index 52cadb89b..c1552f5b6 100644
--- a/backend/python/pydevlake/pydevlake/subtasks.py
+++ b/backend/python/pydevlake/pydevlake/subtasks.py
@@ -14,19 +14,18 @@
 # limitations under the License.
 
 
-from abc import abstractmethod
 import json
+from abc import abstractmethod
 from datetime import datetime
 from typing import Tuple, Dict, Iterable, Generator
 
-
 import sqlalchemy.sql as sql
 from sqlmodel import Session, select
 
-from pydevlake.model import RawModel, ToolModel, DomainModel, SubtaskRun, 
raw_data_params
+from pydevlake import logger
 from pydevlake.context import Context
 from pydevlake.message import RemoteProgress
-from pydevlake import logger
+from pydevlake.model import RawModel, ToolModel, DomainModel, SubtaskRun, 
raw_data_params
 
 
 class Subtask:
@@ -74,7 +73,7 @@ class Subtask:
                 # Send final progress
                 if progress != last_progress:
                     yield RemoteProgress(
-                        increment=progress-last_progress,
+                        increment=progress - last_progress,
                         current=progress
                     )
             except Exception as e:
@@ -144,9 +143,18 @@ class Collector(Subtask):
 
     def process(self, data: object, session: Session, ctx: Context):
         raw_model_class = self.stream.raw_model(session)
+        url, input_info = "", ""
+        if "x_request_url" in data:
+            url = data["x_request_url"]
+            del data["x_request_url"]
+        if "x_request_input" in data:
+            input_info = data["x_request_input"]
+            del data["x_request_input"]
         raw_model = raw_model_class(
             params=self._params(ctx),
-            data=json.dumps(data).encode('utf8')
+            data=json.dumps(data).encode('utf8'),
+            url=url,
+            input=json.dumps(input_info).encode('utf8'),
         )
         session.add(raw_model)
 
@@ -186,6 +194,7 @@ class Extractor(Subtask):
         model = self.stream.tool_model
         session.execute(sql.delete(model).where(model.raw_data_params == 
self._params(ctx)))
 
+
 class Convertor(Subtask):
     @property
     def verb(self):

Reply via email to