This is an automated email from the ASF dual-hosted git repository.

klesh pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/devlake.git


The following commit(s) were added to refs/heads/main by this push:
     new 144c2b7f8 fix(plugin-circleci): populate workflow id for 
unfinished-job collection (#8907) (#8912)
144c2b7f8 is described below

commit 144c2b7f87ad358ebdb5a6accf732541a4a76687
Author: Volodymyr Zahorniak <[email protected]>
AuthorDate: Tue Jun 9 05:33:13 2026 +0300

    fix(plugin-circleci): populate workflow id for unfinished-job collection 
(#8907) (#8912)
    
    * refactor(plugin-circleci): extract unfinished-jobs input clauses into a 
helper
    
    Signed-off-by: Volodymyr Zahorniak <[email protected]>
    
    * fix(plugin-circleci): populate workflow id for unfinished-job collection 
(#8907)
    
    The collectJobs 'unfinished details' collector built its URL from
    '/v2/workflow/{{ .Input.Id }}/job' but its iterator selected 'DISTINCT
    workflow_id' into a models.CircleciJob, leaving .Id empty and producing
    '/v2/workflow//job' (HTTP 500) whenever a job was running/queued/on_hold.
    Alias the projection to 'workflow_id AS id' so .Id carries the workflow id,
    mirroring the new-records collector. Adds an e2e regression test.
    
    Signed-off-by: Volodymyr Zahorniak <[email protected]>
    
    ---------
    
    Signed-off-by: Volodymyr Zahorniak <[email protected]>
    Co-authored-by: Klesh Wong <[email protected]>
---
 backend/plugins/circleci/e2e/job_collector_test.go | 82 ++++++++++++++++++++++
 backend/plugins/circleci/tasks/job_collector.go    | 22 ++++--
 2 files changed, 97 insertions(+), 7 deletions(-)

diff --git a/backend/plugins/circleci/e2e/job_collector_test.go 
b/backend/plugins/circleci/e2e/job_collector_test.go
new file mode 100644
index 000000000..ed2f01421
--- /dev/null
+++ b/backend/plugins/circleci/e2e/job_collector_test.go
@@ -0,0 +1,82 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package e2e
+
+import (
+       "reflect"
+       "sort"
+       "testing"
+
+       "github.com/apache/incubator-devlake/helpers/e2ehelper"
+       "github.com/apache/incubator-devlake/helpers/pluginhelper/api"
+       "github.com/apache/incubator-devlake/plugins/circleci/impl"
+       "github.com/apache/incubator-devlake/plugins/circleci/models"
+       "github.com/apache/incubator-devlake/plugins/circleci/tasks"
+       "github.com/stretchr/testify/assert"
+)
+
+// TestCircleciUnfinishedJobsInputIterator is a regression test for
+// https://github.com/apache/devlake/issues/8907. The "collect unfinished job
+// details" collector builds its URL from "/v2/workflow/{{ .Input.Id }}/job" 
while
+// scanning rows into a models.CircleciJob. Its input query must therefore 
expose the
+// workflow id in the row's Id field; a bare "DISTINCT workflow_id" left Id 
empty and
+// produced "/v2/workflow//job" (HTTP 500). This test runs the production query
+// (tasks.UnfinishedJobsInputClauses) through the real iterator and asserts 
each
+// yielded row's Id is the workflow id, that results are DISTINCT, and that the
+// status/connection filters hold.
+func TestCircleciUnfinishedJobsInputIterator(t *testing.T) {
+       var circleci impl.Circleci
+       dataflowTester := e2ehelper.NewDataFlowTester(t, "circleci", circleci)
+
+       const projectSlug = "github/test/repo"
+       dataflowTester.FlushTabler(&models.CircleciJob{})
+
+       seed := []models.CircleciJob{
+               {ConnectionId: 1, WorkflowId: "wf-onhold", Id: "job-1", 
ProjectSlug: projectSlug, Status: "on_hold"},
+               {ConnectionId: 1, WorkflowId: "wf-onhold", Id: "job-2", 
ProjectSlug: projectSlug, Status: "running"}, // same workflow -> DISTINCT
+               {ConnectionId: 1, WorkflowId: "wf-queued", Id: "job-3", 
ProjectSlug: projectSlug, Status: "queued"},
+               {ConnectionId: 1, WorkflowId: "wf-success", Id: "job-4", 
ProjectSlug: projectSlug, Status: "success"},   // terminal -> excluded
+               {ConnectionId: 2, WorkflowId: "wf-otherconn", Id: "job-5", 
ProjectSlug: projectSlug, Status: "on_hold"}, // other connection -> excluded
+       }
+       for i := range seed {
+               assert.Nil(t, dataflowTester.Dal.Create(&seed[i]))
+       }
+
+       cursor, err := 
dataflowTester.Dal.Cursor(tasks.UnfinishedJobsInputClauses(1, projectSlug)...)
+       assert.Nil(t, err)
+       iter, err := api.NewDalCursorIterator(dataflowTester.Dal, cursor, 
reflect.TypeOf(models.CircleciJob{}))
+       assert.Nil(t, err)
+       defer iter.Close()
+
+       var ids []string
+       for iter.HasNext() {
+               item, err := iter.Fetch()
+               assert.Nil(t, err)
+               job := item.(*models.CircleciJob)
+               ids = append(ids, job.Id)
+       }
+       sort.Strings(ids)
+
+       // Distinct workflow ids for connection 1's non-terminal jobs, with Id 
populated
+       // (the URL template reads .Input.Id). wf-success (terminal) and 
wf-otherconn
+       // (connection 2) are excluded.
+       assert.Equal(t, []string{"wf-onhold", "wf-queued"}, ids)
+       for _, id := range ids {
+               assert.NotEmpty(t, id, "Input.Id must be the workflow id, not 
empty (#8907)")
+       }
+}
diff --git a/backend/plugins/circleci/tasks/job_collector.go 
b/backend/plugins/circleci/tasks/job_collector.go
index fd1d78286..00fd23452 100644
--- a/backend/plugins/circleci/tasks/job_collector.go
+++ b/backend/plugins/circleci/tasks/job_collector.go
@@ -41,6 +41,20 @@ var CollectJobsMeta = plugin.SubTaskMeta{
        DomainTypes:      []string{plugin.DOMAIN_TYPE_CICD},
 }
 
+// UnfinishedJobsInputClauses returns the DAL clauses that select the 
workflows whose
+// jobs are still in a non-terminal status and therefore need their job details
+// recollected by the CollectJobs "unfinished details" collector.
+func UnfinishedJobsInputClauses(connectionId uint64, projectSlug string) 
[]dal.Clause {
+       return []dal.Clause{
+               dal.Select("DISTINCT workflow_id AS id"), // #8907: alias to id 
so {{ .Input.Id }} resolves when scanned into CircleciJob
+               dal.From(&models.CircleciJob{}),
+               dal.Where(
+                       "connection_id = ? AND project_slug = ? AND status IN 
('running', 'not_running', 'queued', 'on_hold')",
+                       connectionId, projectSlug,
+               ),
+       }
+}
+
 func CollectJobs(taskCtx plugin.SubTaskContext) errors.Error {
        rawDataSubTaskArgs, data := CreateRawDataSubTaskArgs(taskCtx, 
RAW_JOB_TABLE)
        logger := taskCtx.GetLogger()
@@ -94,14 +108,8 @@ func CollectJobs(taskCtx plugin.SubTaskContext) 
errors.Error {
                                AfterResponse:  ignoreDeletedBuilds,
                        },
                        BuildInputIterator: func() (api.Iterator, errors.Error) 
{
-                               clauses := []dal.Clause{
-                                       dal.Select("DISTINCT workflow_id"), // 
Only need to recollect jobs for a workflow once
-                                       dal.From(&models.CircleciJob{}),
-                                       dal.Where("connection_id = ? AND 
project_slug = ? AND status IN ('running', 'not_running', 'queued', 
'on_hold')", data.Options.ConnectionId, data.Options.ProjectSlug),
-                               }
-
                                db := taskCtx.GetDal()
-                               cursor, err := db.Cursor(clauses...)
+                               cursor, err := 
db.Cursor(UnfinishedJobsInputClauses(data.Options.ConnectionId, 
data.Options.ProjectSlug)...)
                                if err != nil {
                                        return nil, err
                                }

Reply via email to