This is an automated email from the ASF dual-hosted git repository.
klesh pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git
The following commit(s) were added to refs/heads/main by this push:
new caa5a6ac7 fix(linker): scope clearHistoryData to current project only
(#8814) (#8815)
caa5a6ac7 is described below
commit caa5a6ac752a03afe3dedb42e712448aa3997c4c
Author: Daniele M. <[email protected]>
AuthorDate: Wed Apr 1 12:22:21 2026 +0100
fix(linker): scope clearHistoryData to current project only (#8814) (#8815)
The clearHistoryData() function used a LEFT JOIN with project_name
in the ON clause, causing the subquery to return all PR IDs regardless
of project. This effectively wiped the entire pull_request_issues table
on every linker run, deleting links from other projects sharing the
same repos and links created by the GitHub converter.
Fix:
- Use INNER JOIN + WHERE for proper project scoping
- Add issue-side subquery scoped to current project's boards
- Filter by _raw_data_table/_raw_data_remark to only delete
linker-created rows
Add e2e test for cross-project shared repo scenario.
---
.../plugins/linker/e2e/link_pr_and_issue_test.go | 42 ++++++++++++++++++++++
.../snapshot_tables/cross_project_board_issues.csv | 4 +++
.../e2e/snapshot_tables/cross_project_issues.csv | 4 +++
.../cross_project_project_mapping.csv | 5 +++
.../cross_project_pull_request_issues_after.csv | 5 +++
.../cross_project_pull_request_issues_before.csv | 4 +++
.../cross_project_pull_requests.csv | 3 ++
backend/plugins/linker/tasks/link_pr_and_issue.go | 18 +++++++---
8 files changed, 81 insertions(+), 4 deletions(-)
diff --git a/backend/plugins/linker/e2e/link_pr_and_issue_test.go
b/backend/plugins/linker/e2e/link_pr_and_issue_test.go
index dbec92a60..15a0e4767 100644
--- a/backend/plugins/linker/e2e/link_pr_and_issue_test.go
+++ b/backend/plugins/linker/e2e/link_pr_and_issue_test.go
@@ -29,6 +29,48 @@ import (
"github.com/apache/incubator-devlake/plugins/linker/tasks"
)
+func TestLinkPrToIssueWithSharedRepo(t *testing.T) {
+ var plugin impl.Linker
+ dataflowTester := e2ehelper.NewDataFlowTester(t, "linker", plugin)
+
+ regexpStr := "#(\\d+)"
+ re, err := regexp.Compile(regexpStr)
+ if err != nil {
+ panic(err)
+ }
+ taskData := &tasks.LinkerTaskData{
+ Options: &tasks.LinkerOptions{
+ PrToIssueRegexp: regexpStr,
+ ProjectName: "GitHub1",
+ },
+ PrToIssueRegexp: re,
+ }
+
+ // Import input tables for two projects sharing repo R1
+
dataflowTester.ImportCsvIntoTabler("./snapshot_tables/cross_project_issues.csv",
&ticket.Issue{})
+
dataflowTester.ImportCsvIntoTabler("./snapshot_tables/cross_project_pull_requests.csv",
&code.PullRequest{})
+
dataflowTester.ImportCsvIntoTabler("./snapshot_tables/cross_project_project_mapping.csv",
&crossdomain.ProjectMapping{})
+
dataflowTester.ImportCsvIntoTabler("./snapshot_tables/cross_project_board_issues.csv",
&ticket.BoardIssue{})
+
+ // Pre-populate pull_request_issues with data from GitHub2's linker and
from GitHub converter.
+ // These rows must survive when we run the linker for GitHub1.
+
dataflowTester.ImportCsvIntoTabler("./snapshot_tables/cross_project_pull_request_issues_before.csv",
&crossdomain.PullRequestIssue{})
+
+ dataflowTester.Subtask(tasks.LinkPrToIssueMeta, taskData)
+
+ // Verify: GitHub1's links are created, GitHub2's linker link and
converter link survive
+ dataflowTester.VerifyTable(
+ crossdomain.PullRequestIssue{},
+ "./snapshot_tables/cross_project_pull_request_issues_after.csv",
+ e2ehelper.ColumnWithRawData(
+ "pull_request_id",
+ "pull_request_key",
+ "issue_id",
+ "issue_key",
+ ),
+ )
+}
+
func TestLinkPrToIssue(t *testing.T) {
var plugin impl.Linker
dataflowTester := e2ehelper.NewDataFlowTester(t, "linker", plugin)
diff --git
a/backend/plugins/linker/e2e/snapshot_tables/cross_project_board_issues.csv
b/backend/plugins/linker/e2e/snapshot_tables/cross_project_board_issues.csv
new file mode 100644
index 000000000..8b61a7a95
--- /dev/null
+++ b/backend/plugins/linker/e2e/snapshot_tables/cross_project_board_issues.csv
@@ -0,0 +1,4 @@
+"board_id","issue_id","created_at","updated_at","_raw_data_params","_raw_data_table","_raw_data_id","_raw_data_remark"
+"github:GithubRepo:1:384111310","github:GithubIssue:1:1237324696","2024-05-14
10:42:37.541","2024-05-28
00:25:41.436","{""ConnectionId"":1,""Name"":""apache/incubator-devlake""}","_raw_github_graphql_issues",69,""
+"github:GithubRepo:1:384111310","github:GithubIssue:1:1237324697","2024-05-14
10:42:37.541","2024-05-28
00:25:41.436","{""ConnectionId"":1,""Name"":""apache/incubator-devlake""}","_raw_github_graphql_issues",69,""
+"github:GithubRepo:2:999999999","github:GithubIssue:2:2001","2024-05-14
10:42:37.541","2024-05-28
00:25:41.436","{""ConnectionId"":2,""Name"":""other/repo""}","_raw_github_graphql_issues",70,""
diff --git
a/backend/plugins/linker/e2e/snapshot_tables/cross_project_issues.csv
b/backend/plugins/linker/e2e/snapshot_tables/cross_project_issues.csv
new file mode 100644
index 000000000..2805a0d3a
--- /dev/null
+++ b/backend/plugins/linker/e2e/snapshot_tables/cross_project_issues.csv
@@ -0,0 +1,4 @@
+"id","created_at","updated_at","_raw_data_params","_raw_data_table","_raw_data_id","_raw_data_remark","url","icon_url","issue_key","title","description","epic_key","type","original_type","status","original_status","resolution_date","created_date","updated_date","lead_time_minutes","parent_issue_id","priority","story_point","original_estimate_minutes","time_spent_minutes","time_remaining_minutes","creator_id","creator_name","assignee_id","assignee_name","severity","component","original_pr
[...]
+"github:GithubIssue:1:1237324696","2024-05-14 10:42:37.529","2024-05-15
12:07:36.450","{""ConnectionId"":1,""Name"":""apache/incubator-devlake""}","_raw_github_graphql_issues",59,"","https://github.com/apache/incubator-devlake/issues/1884","","1884","Issue
1884","desc","","","type/feature-request","TODO","OPEN","2032-05-16
15:23:21.000","2022-05-16 15:23:21.000","2024-05-11
00:17:21.000",10,"","",11,1,12,11,"github:GithubAccount:1:14050754","Startrekzky","","","","","",""
+"github:GithubIssue:1:1237324697","2024-05-14 10:42:37.529","2024-05-15
12:07:36.450","{""ConnectionId"":1,""Name"":""apache/incubator-devlake""}","_raw_github_graphql_issues",59,"","https://github.com/apache/incubator-devlake/issues/1885","","1885","Issue
1885","desc","","","type/feature-request","TODO","OPEN","2032-05-16
15:23:21.000","2022-05-16 15:23:21.000","2024-05-11
00:17:21.000",10,"","",11,1,12,11,"github:GithubAccount:1:14050754","Startrekzky","","","","","",""
+"github:GithubIssue:2:2001","2024-05-14 10:42:37.529","2024-05-15
12:07:36.450","{""ConnectionId"":2,""Name"":""other/repo""}","_raw_github_graphql_issues",60,"","https://github.com/other/repo/issues/2001","","2001","Issue
2001","desc","","","type/bug","TODO","OPEN","2032-05-16
15:23:21.000","2022-05-16 15:23:21.000","2024-05-11
00:17:21.000",10,"","",11,1,12,11,"github:GithubAccount:1:14050754","Startrekzky","","","","","",""
diff --git
a/backend/plugins/linker/e2e/snapshot_tables/cross_project_project_mapping.csv
b/backend/plugins/linker/e2e/snapshot_tables/cross_project_project_mapping.csv
new file mode 100644
index 000000000..2fc407726
--- /dev/null
+++
b/backend/plugins/linker/e2e/snapshot_tables/cross_project_project_mapping.csv
@@ -0,0 +1,5 @@
+"project_name","table","row_id","created_at","updated_at","_raw_data_params","_raw_data_table","_raw_data_id","_raw_data_remark"
+"GitHub1","repos","github:GithubRepo:1:384111310","2024-05-15
12:02:13.590","2024-05-15 12:02:13.590","GitHub1","",0,""
+"GitHub1","boards","github:GithubRepo:1:384111310","2024-05-15
12:02:13.590","2024-05-15 12:02:13.590","GitHub1","",0,""
+"GitHub2","repos","github:GithubRepo:1:384111310","2024-05-15
12:02:13.590","2024-05-15 12:02:13.590","GitHub2","",0,""
+"GitHub2","boards","github:GithubRepo:2:999999999","2024-05-15
12:02:13.590","2024-05-15 12:02:13.590","GitHub2","",0,""
diff --git
a/backend/plugins/linker/e2e/snapshot_tables/cross_project_pull_request_issues_after.csv
b/backend/plugins/linker/e2e/snapshot_tables/cross_project_pull_request_issues_after.csv
new file mode 100644
index 000000000..87ff1971b
--- /dev/null
+++
b/backend/plugins/linker/e2e/snapshot_tables/cross_project_pull_request_issues_after.csv
@@ -0,0 +1,5 @@
+pull_request_id,issue_id,pull_request_key,issue_key,_raw_data_params,_raw_data_table,_raw_data_id,_raw_data_remark
+github:GithubPullRequest:1:1819250573,github:GithubIssue:1:1237324696,7317,1884,,,0,"pull_requests,"
+github:GithubPullRequest:1:1819250573,github:GithubIssue:1:1237324697,7317,1885,,,0,"pull_requests,"
+github:GithubPullRequest:1:1819250574,github:GithubIssue:2:2001,7318,2001,,,0,"pull_requests,"
+github:GithubPullRequest:1:1819250574,github:GithubIssue:1:1237324696,7318,1884,"{""ConnectionId"":1,""Name"":""apache/incubator-devlake""}",_raw_github_api_pull_requests,191,""
diff --git
a/backend/plugins/linker/e2e/snapshot_tables/cross_project_pull_request_issues_before.csv
b/backend/plugins/linker/e2e/snapshot_tables/cross_project_pull_request_issues_before.csv
new file mode 100644
index 000000000..217266f3b
--- /dev/null
+++
b/backend/plugins/linker/e2e/snapshot_tables/cross_project_pull_request_issues_before.csv
@@ -0,0 +1,4 @@
+pull_request_id,issue_id,pull_request_key,issue_key,_raw_data_params,_raw_data_table,_raw_data_id,_raw_data_remark
+github:GithubPullRequest:1:1819250574,github:GithubIssue:1:1237324697,7318,1885,,,0,"pull_requests,"
+github:GithubPullRequest:1:1819250574,github:GithubIssue:2:2001,7318,2001,,,0,"pull_requests,"
+github:GithubPullRequest:1:1819250574,github:GithubIssue:1:1237324696,7318,1884,"{""ConnectionId"":1,""Name"":""apache/incubator-devlake""}",_raw_github_api_pull_requests,191,""
diff --git
a/backend/plugins/linker/e2e/snapshot_tables/cross_project_pull_requests.csv
b/backend/plugins/linker/e2e/snapshot_tables/cross_project_pull_requests.csv
new file mode 100644
index 000000000..8c6907f0f
--- /dev/null
+++ b/backend/plugins/linker/e2e/snapshot_tables/cross_project_pull_requests.csv
@@ -0,0 +1,3 @@
+"id","created_at","updated_at","_raw_data_params","_raw_data_table","_raw_data_id","_raw_data_remark","base_repo_id","base_ref","base_commit_sha","head_repo_id","head_ref","head_commit_sha","merge_commit_sha","status","original_status","type","component","title","description","url","author_name","author_id","parent_pr_id","pull_request_key","created_date","merged_date","closed_date"
+"github:GithubPullRequest:1:1819250573","2024-05-15 12:07:36.778","2024-05-15
12:07:36.778","{""ConnectionId"":1,""Name"":""apache/incubator-devlake""}","_raw_github_api_pull_requests",191,"","github:GithubRepo:1:384111310","main","64c52748f3529784cb6c8a372691aa0f638fa73d","github:GithubRepo:1:384111310","fix#7275","14fb6488f2208e6a65374a86efce12dd460987e0","91dbce48759da14a4a030124c3ef751f1c5d8389","CLOSED","closed","","","fix:
can't GET projects which have / in their name #1884 #1885", [...]
+"github:GithubPullRequest:1:1819250574","2024-05-15 12:07:36.778","2024-05-15
12:07:36.778","{""ConnectionId"":1,""Name"":""apache/incubator-devlake""}","_raw_github_api_pull_requests",192,"","github:GithubRepo:1:384111310","main","64c52748f3529784cb6c8a372691aa0f638fa73d","github:GithubRepo:1:384111310","fix#2001","14fb6488f2208e6a65374a86efce12dd460987e0","","MERGED","merged","","","fix:
something related to
#2001","desc","https://github.com/apache/incubator-devlake/pull/7318","abeizn"
[...]
diff --git a/backend/plugins/linker/tasks/link_pr_and_issue.go
b/backend/plugins/linker/tasks/link_pr_and_issue.go
index 52fb7aa86..faeeffd71 100644
--- a/backend/plugins/linker/tasks/link_pr_and_issue.go
+++ b/backend/plugins/linker/tasks/link_pr_and_issue.go
@@ -52,13 +52,23 @@ func clearHistoryData(db dal.Dal, data *LinkerTaskData)
errors.Error {
WHERE pull_request_id IN (
SELECT pr.id
FROM pull_requests pr
- LEFT JOIN project_mapping pm
+ INNER JOIN project_mapping pm
ON pm.table = 'repos'
AND pm.row_id = pr.base_repo_id
- AND pm.project_name = ?
- )
+ WHERE pm.project_name = ?
+ )
+ AND issue_id IN (
+ SELECT bi.issue_id
+ FROM board_issues bi
+ INNER JOIN project_mapping pm2
+ ON pm2.table = 'boards'
+ AND pm2.row_id = bi.board_id
+ WHERE pm2.project_name = ?
+ )
+ AND (_raw_data_table = '' OR _raw_data_table IS NULL)
+ AND _raw_data_remark LIKE '%pull_requests,%'
`
- return db.Exec(sql, data.Options.ProjectName)
+ return db.Exec(sql, data.Options.ProjectName, data.Options.ProjectName)
}
func LinkPrToIssue(taskCtx plugin.SubTaskContext) errors.Error {