This is an automated email from the ASF dual-hosted git repository.

warren pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git


The following commit(s) were added to refs/heads/main by this push:
     new 1420f7a2f fix(jira): optimize collector for remotelink worklog 
changelog (#4245)
1420f7a2f is described below

commit 1420f7a2fa677a8f6c00292cf1f8eba0e78123d0
Author: Warren Chen <[email protected]>
AuthorDate: Sat Jan 21 19:39:16 2023 +0800

    fix(jira): optimize collector for remotelink worklog changelog (#4245)
    
    * fix(jira): optimize collector for remotelink worklog changelog
    
    * fix(jira): fix  for review
    
    * fix(jira): add comment and optimize sql
---
 backend/helpers/pluginhelper/api/api_extractor.go       |  1 -
 backend/plugins/jira/tasks/issue_changelog_collector.go | 12 +++++++++++-
 backend/plugins/jira/tasks/remotelink_collector.go      | 15 ++++++++++-----
 backend/plugins/jira/tasks/worklog_collector.go         | 12 +++++++++++-
 4 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/backend/helpers/pluginhelper/api/api_extractor.go 
b/backend/helpers/pluginhelper/api/api_extractor.go
index 2f37d0de4..2b4f86e5d 100644
--- a/backend/helpers/pluginhelper/api/api_extractor.go
+++ b/backend/helpers/pluginhelper/api/api_extractor.go
@@ -127,7 +127,6 @@ func (extractor *ApiExtractor) Execute() errors.Error {
                        if err != nil {
                                return errors.Default.Wrap(err, "error adding 
result to batch")
                        }
-                       extractor.args.Ctx.IncProgress(1)
                }
                extractor.args.Ctx.IncProgress(1)
        }
diff --git a/backend/plugins/jira/tasks/issue_changelog_collector.go 
b/backend/plugins/jira/tasks/issue_changelog_collector.go
index 46f525373..480d853b1 100644
--- a/backend/plugins/jira/tasks/issue_changelog_collector.go
+++ b/backend/plugins/jira/tasks/issue_changelog_collector.go
@@ -75,7 +75,17 @@ func CollectIssueChangelogs(taskCtx plugin.SubTaskContext) 
errors.Error {
        }
        incremental := collectorWithState.IsIncremental()
        if incremental {
-               clauses = append(clauses, dal.Having("i.updated > 
max(c.issue_updated) OR  (max(c.issue_updated) IS NULL AND 
COUNT(c.changelog_id) > 0)"))
+               clauses = append(clauses, dal.Having("i.updated > ? AND 
(i.updated > max(c.issue_updated) OR (max(c.issue_updated) IS NULL AND 
COUNT(c.changelog_id) > 0))", 
collectorWithState.LatestState.LatestSuccessStart))
+       } else {
+               /*
+                       i.updated > max(rl.issue_updated) was deleted because 
for non-incremental collection,
+                       max(rl.issue_updated) will only be one of null, less or 
equal to i.updated
+                       so i.updated > max(rl.issue_updated) is always false.
+                       max(c.issue_updated) IS NULL AND COUNT(c.changelog_id) 
> 0 infers the issue has more than 100 changelogs,
+                       because we collected changelogs when collecting issues, 
and assign changelog.issue_updated if num of changelogs < 100,
+                       and max(c.issue_updated) IS NULL AND 
COUNT(c.changelog_id) > 0 means all changelogs for the issue were not assigned 
issue_updated
+               */
+               clauses = append(clauses, dal.Having("max(c.issue_updated) IS 
NULL AND COUNT(c.changelog_id) > 0"))
        }
 
        if logger.IsLevelEnabled(log.LOG_DEBUG) {
diff --git a/backend/plugins/jira/tasks/remotelink_collector.go 
b/backend/plugins/jira/tasks/remotelink_collector.go
index 52fc9ee99..3ebed4c39 100644
--- a/backend/plugins/jira/tasks/remotelink_collector.go
+++ b/backend/plugins/jira/tasks/remotelink_collector.go
@@ -68,12 +68,17 @@ func CollectRemotelinks(taskCtx plugin.SubTaskContext) 
errors.Error {
        }
        incremental := collectorWithState.IsIncremental()
        if incremental {
-               if collectorWithState.LatestState.LatestSuccessStart != nil {
-                       clauses = append(clauses, dal.Having("i.updated > ? AND 
(i.updated > max(rl.issue_updated) OR max(rl.issue_updated) IS NULL)", 
collectorWithState.LatestState.LatestSuccessStart))
-               } else {
-                       clauses = append(clauses, dal.Having("i.updated > 
max(rl.issue_updated) OR max(rl.issue_updated) IS NULL"))
-               }
+               clauses = append(clauses, dal.Having("i.updated > ? AND 
(i.updated > max(rl.issue_updated) OR max(rl.issue_updated) IS NULL)", 
collectorWithState.LatestState.LatestSuccessStart))
        }
+       /*
+               i.updated > max(rl.issue_updated) was deleted because for 
non-incremental collection, max(rl.issue_updated) is always null.
+                       so i.updated > max(rl.issue_updated) is constantly false
+               also, for the first collection, max(rl.issue_updated) is always 
null as there is no data in _tool_jira_remotelinks.
+               In conclusion, we don't need the following clause
+       */
+       //else {
+       // clauses = append(clauses, dal.Having("i.updated > 
max(rl.issue_updated) OR max(rl.issue_updated) IS NULL "))
+       //}
        cursor, err := db.Cursor(clauses...)
        if err != nil {
                logger.Error(err, "collect remotelink error")
diff --git a/backend/plugins/jira/tasks/worklog_collector.go 
b/backend/plugins/jira/tasks/worklog_collector.go
index 517b88aea..2d9d0c85d 100644
--- a/backend/plugins/jira/tasks/worklog_collector.go
+++ b/backend/plugins/jira/tasks/worklog_collector.go
@@ -67,7 +67,17 @@ func CollectWorklogs(taskCtx plugin.SubTaskContext) 
errors.Error {
        }
        incremental := collectorWithState.IsIncremental()
        if incremental {
-               clauses = append(clauses, dal.Having("i.updated > 
max(wl.issue_updated) OR  (max(wl.issue_updated) IS NULL AND 
COUNT(wl.worklog_id) > 0)"))
+               clauses = append(clauses, dal.Having("i.updated > ? AND 
(i.updated > max(wl.issue_updated) OR (max(wl.issue_updated) IS NULL AND 
COUNT(wl.worklog_id) > 0))", collectorWithState.LatestState.LatestSuccessStart))
+       } else {
+               /*
+                       i.updated > max(rl.issue_updated) was deleted because 
for non-incremental collection,
+                       max(rl.issue_updated) will only be one of null, less or 
equal to i.updated
+                       so i.updated > max(rl.issue_updated) is always false.
+                       max(c.issue_updated) IS NULL AND COUNT(c.worklog_id) > 
0 infers the issue has more than 100 worklogs,
+                       because we collected worklogs when collecting issues, 
and assign worklog.issue_updated if num of worklogs < 100,
+                       and max(c.issue_updated) IS NULL AND 
COUNT(c.worklog_id) > 0 means all worklogs for the issue were not assigned 
issue_updated
+               */
+               clauses = append(clauses, dal.Having("max(wl.issue_updated) IS 
NULL AND COUNT(wl.worklog_id) > 0"))
        }
 
        // construct the input iterator

Reply via email to