This is an automated email from the ASF dual-hosted git repository.

warren pushed a commit to branch fix-8669
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git

commit 77a71998b18d6f01c903607d05f7435486bb754c
Author: warren <[email protected]>
AuthorDate: Wed Dec 31 18:54:13 2025 +0800

    feat(azuredevops): add environment_pattern for extracting environment names 
from job/stage names
    
    This enhancement addresses the issue where the Azure DevOps plugin was 
unable to
    correctly detect production deployments when the environment name is 
embedded in
    job/stage names rather than being in a standard format.
    
    Changes:
    - Add `environment_pattern` scope config field that supports regex capture 
groups
      to extract environment names from job/stage names
    - Collect both Job and Stage records from the timeline API (previously only 
Job)
    - Add `identifier`, `type`, and `parent_id` fields to the Job model
    - Update environment detection logic:
      - If environment_pattern is configured, extract the environment name first
      - Apply production_pattern to the extracted environment name
      - Fall back to matching production_pattern against job name if no 
extraction
    - Fix default environment behavior: only default to PRODUCTION when
      production_pattern is not configured (was defaulting PRODUCTION always)
    - Add comprehensive tests for the new functionality
    
    Example configuration for pipelines with jobs like 'deploy_xxxx-prod_helm':
    - deployment_pattern: deploy
    - production_pattern: prod
    - environment_pattern: 
(?:deploy|predeploy)[_-](.+?)(?:[_-](?:helm|terraform))?$
    
    This extracts 'xxxx-prod' from the job name and then applies 
production_pattern
    to correctly identify it as a production deployment.
    
    🤖 Generated with [Claude Code](https://claude.com/claude-code)
    
    Co-Authored-By: Claude Opus 4.5 <[email protected]>
---
 .../plugins/azuredevops/azuredevops/models.py      |   6 +
 .../azuredevops/azuredevops/streams/builds.py      |  14 +-
 .../azuredevops/azuredevops/streams/jobs.py        |  54 +++++-
 .../plugins/azuredevops/tests/streams_test.py      | 186 +++++++++++++++++++++
 4 files changed, 250 insertions(+), 10 deletions(-)

diff --git a/backend/python/plugins/azuredevops/azuredevops/models.py 
b/backend/python/plugins/azuredevops/azuredevops/models.py
index 27c112429..73f4d0764 100644
--- a/backend/python/plugins/azuredevops/azuredevops/models.py
+++ b/backend/python/plugins/azuredevops/azuredevops/models.py
@@ -37,6 +37,9 @@ class GitRepositoryConfig(ScopeConfig):
     refdiff: Optional[RefDiffOptions]
     deployment_pattern: Optional[re.Pattern]
     production_pattern: Optional[re.Pattern]
+    # Optional pattern with capture group to extract environment name from 
job/stage names
+    # Example: r'(?:deploy|predeploy)[_-](.+?)(?:[_-](?:helm|terraform))?$' 
extracts 'xxxx-prod' from 'deploy_xxxx-prod_helm'
+    environment_pattern: Optional[re.Pattern]
 
 
 class GitRepository(ToolScope, table=True):
@@ -146,3 +149,6 @@ class Job(ToolModel, table=True):
     finish_time: Optional[datetime.datetime]
     state: JobState
     result: Optional[JobResult]
+    identifier: Optional[str]
+    type: Optional[str]
+    parent_id: Optional[str] = Field(source='/parentId')
diff --git a/backend/python/plugins/azuredevops/azuredevops/streams/builds.py 
b/backend/python/plugins/azuredevops/azuredevops/streams/builds.py
index 5230bffa3..8bf91d797 100644
--- a/backend/python/plugins/azuredevops/azuredevops/streams/builds.py
+++ b/backend/python/plugins/azuredevops/azuredevops/streams/builds.py
@@ -72,10 +72,16 @@ class Builds(Stream):
         if ctx.scope_config.deployment_pattern and 
ctx.scope_config.deployment_pattern.search(b.name):
             type = devops.CICDType.DEPLOYMENT
 
-        environment = devops.CICDEnvironment.PRODUCTION
-        if ctx.scope_config.production_pattern is not None and 
ctx.scope_config.production_pattern.search(
-                b.name) is None:
-            environment = None
+        # Determine if this is a production environment
+        # Match production_pattern against pipeline name
+        environment = None
+        if ctx.scope_config.production_pattern is not None:
+            if ctx.scope_config.production_pattern.search(b.name):
+                environment = devops.CICDEnvironment.PRODUCTION
+        else:
+            # No production_pattern configured - default to PRODUCTION for 
deployments
+            if type == devops.CICDType.DEPLOYMENT:
+                environment = devops.CICDEnvironment.PRODUCTION
 
         if b.finish_time:
             duration_sec = abs(b.finish_time.timestamp() - 
b.start_time.timestamp())
diff --git a/backend/python/plugins/azuredevops/azuredevops/streams/jobs.py 
b/backend/python/plugins/azuredevops/azuredevops/streams/jobs.py
index 7d2cd99fe..416b4696b 100644
--- a/backend/python/plugins/azuredevops/azuredevops/streams/jobs.py
+++ b/backend/python/plugins/azuredevops/azuredevops/streams/jobs.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 from http import HTTPStatus
-from typing import Iterable
+from typing import Iterable, Optional
 
 import pydevlake.domain_layer.devops as devops
 from azuredevops.api import AzureDevOpsAPI
@@ -24,6 +24,31 @@ from pydevlake import Context, Substream, DomainType
 from pydevlake.api import APIException
 
 
+def extract_environment_name(name: str, identifier: Optional[str], context: 
Context) -> Optional[str]:
+    """
+    Extract environment name from job/stage name or identifier using 
environment_pattern.
+
+    The environment_pattern should contain a capture group to extract the 
environment name.
+    For example: r'(?:deploy|predeploy)[_-](.+?)(?:[_-](?:helm|terraform))?$'
+    This would extract 'xxxx-prod' from 'deploy_xxxx-prod_helm'
+    """
+    if not context.scope_config.environment_pattern:
+        return None
+
+    # Try to match against the name first
+    match = context.scope_config.environment_pattern.search(name)
+    if match and match.groups():
+        return match.group(1)
+
+    # If no match on name and identifier is available, try identifier
+    if identifier:
+        match = context.scope_config.environment_pattern.search(identifier)
+        if match and match.groups():
+            return match.group(1)
+
+    return None
+
+
 class Jobs(Substream):
     tool_model = Job
     domain_types = [DomainType.CICD]
@@ -48,7 +73,8 @@ class Jobs(Substream):
         if response.status == HTTPStatus.NO_CONTENT:
             return
         for raw_job in response.json["records"]:
-            if raw_job["type"] == "Job":
+            # Collect both Job and Stage records to support environment 
detection from stages
+            if raw_job["type"] in ("Job", "Stage"):
                 raw_job["build_id"] = parent.domain_id()
                 raw_job["x_request_url"] = response.get_url_with_query_string()
                 raw_job["x_request_input"] = {
@@ -87,10 +113,26 @@ class Jobs(Substream):
         type = devops.CICDType.BUILD
         if ctx.scope_config.deployment_pattern and 
ctx.scope_config.deployment_pattern.search(j.name):
             type = devops.CICDType.DEPLOYMENT
-        environment = devops.CICDEnvironment.PRODUCTION
-        if ctx.scope_config.production_pattern is not None and 
ctx.scope_config.production_pattern.search(
-                j.name) is None:
-            environment = None
+
+        # Extract environment name using the new environment_pattern if 
configured
+        extracted_env_name = extract_environment_name(j.name, j.identifier, 
ctx)
+
+        # Determine if this is a production environment
+        # Priority: 1) Use extracted environment name with production_pattern
+        #           2) Fall back to matching production_pattern against job 
name
+        environment = None
+        if ctx.scope_config.production_pattern is not None:
+            # If we extracted an environment name, use it for production 
matching
+            if extracted_env_name:
+                if 
ctx.scope_config.production_pattern.search(extracted_env_name):
+                    environment = devops.CICDEnvironment.PRODUCTION
+            # Fall back to matching against job name
+            elif ctx.scope_config.production_pattern.search(j.name):
+                environment = devops.CICDEnvironment.PRODUCTION
+        else:
+            # No production_pattern configured - default to PRODUCTION for 
deployments
+            if type == devops.CICDType.DEPLOYMENT:
+                environment = devops.CICDEnvironment.PRODUCTION
 
         if j.finish_time:
             duration_sec = abs(j.finish_time.timestamp() - 
j.start_time.timestamp())
diff --git a/backend/python/plugins/azuredevops/tests/streams_test.py 
b/backend/python/plugins/azuredevops/tests/streams_test.py
index 7e3905738..a5a65d9a5 100644
--- a/backend/python/plugins/azuredevops/tests/streams_test.py
+++ b/backend/python/plugins/azuredevops/tests/streams_test.py
@@ -360,3 +360,189 @@ def test_pull_request_commits_stream():
     )
 
     assert_stream_convert(AzureDevOpsPlugin, 'gitpullrequestcommits', raw, 
expected)
+
+
[email protected]
+def context_with_environment_pattern():
+    """Context with environment_pattern configured to extract environment 
names from job names."""
+    return (
+        ContextBuilder(AzureDevOpsPlugin())
+        .with_connection(token='token')
+        .with_scope_config(
+            deployment_pattern='deploy',
+            production_pattern='prod',
+            # Pattern to extract environment name from job names like 
'deploy_xxxx-prod_helm'
+            
environment_pattern=r'(?:deploy|predeploy)[_-](.+?)(?:[_-](?:helm|terraform))?$'
+        )
+        .with_scope('johndoe/test-repo', 
url='https://github.com/johndoe/test-repo')
+        .build()
+    )
+
+
+def 
test_jobs_stream_with_environment_pattern(context_with_environment_pattern):
+    """Test that environment_pattern extracts environment name and uses it for 
production matching."""
+    raw = {
+        'previousAttempts': [],
+        'id': 'cfa20e98-6997-523c-4233-f0a7302c929f',
+        'parentId': '9ecf18fe-987d-5811-7c63-300aecae35da',
+        'type': 'Job',
+        'name': 'deploy_xxxx-prod_helm',  # environment name 'xxxx-prod' 
should be extracted
+        'build_id': 'azuredevops:Build:1:12',
+        'start_time': '2023-02-25T06:22:36.8066667Z',
+        'finish_time': '2023-02-25T06:22:43.2333333Z',
+        'currentOperation': None,
+        'percentComplete': None,
+        'state': 'completed',
+        'result': 'succeeded',
+        'resultCode': None,
+        'changeId': 18,
+        'lastModified': '0001-01-01T00:00:00',
+        'workerName': 'Hosted Agent',
+        'queueId': 9,
+        'order': 1,
+        'details': None,
+        'errorCount': 0,
+        'warningCount': 0,
+        'url': None,
+        'log': {
+            'id': 10,
+            'type': 'Container',
+            'url': 
'https://dev.azure.com/johndoe/7a3fd40e-2aed-4fac-bac9-511bf1a70206/_apis/build/builds/12/logs/10'
+        },
+        'task': None,
+        'attempt': 1,
+        'identifier': 'deploy_xxxx-prod_helm.__default'
+    }
+
+    expected = devops.CICDTask(
+        id='cfa20e98-6997-523c-4233-f0a7302c929f',
+        name='deploy_xxxx-prod_helm',
+        pipeline_id='azuredevops:Build:1:12',
+        status=devops.CICDStatus.DONE,
+        original_status='Completed',
+        original_result='Succeeded',
+        created_date='2023-02-25T06:22:36.8066667Z',
+        started_date='2023-02-25T06:22:36.8066667Z',
+        finished_date='2023-02-25T06:22:43.2333333Z',
+        result=devops.CICDResult.SUCCESS,
+        type=devops.CICDType.DEPLOYMENT,
+        duration_sec=6.426667213439941,
+        environment=devops.CICDEnvironment.PRODUCTION,  # Should match because 
'xxxx-prod' contains 'prod'
+        cicd_scope_id=context_with_environment_pattern.scope.domain_id()
+    )
+    assert_stream_convert(AzureDevOpsPlugin, 'jobs', raw, expected, 
context_with_environment_pattern)
+
+
+def 
test_jobs_stream_with_environment_pattern_non_prod(context_with_environment_pattern):
+    """Test that non-prod environments are correctly identified."""
+    raw = {
+        'previousAttempts': [],
+        'id': 'cfa20e98-6997-523c-4233-f0a7302c929f',
+        'parentId': '9ecf18fe-987d-5811-7c63-300aecae35da',
+        'type': 'Job',
+        'name': 'deploy_xxxx-dev_helm',  # environment name 'xxxx-dev' should 
be extracted, not prod
+        'build_id': 'azuredevops:Build:1:12',
+        'start_time': '2023-02-25T06:22:36.8066667Z',
+        'finish_time': '2023-02-25T06:22:43.2333333Z',
+        'currentOperation': None,
+        'percentComplete': None,
+        'state': 'completed',
+        'result': 'succeeded',
+        'resultCode': None,
+        'changeId': 18,
+        'lastModified': '0001-01-01T00:00:00',
+        'workerName': 'Hosted Agent',
+        'queueId': 9,
+        'order': 1,
+        'details': None,
+        'errorCount': 0,
+        'warningCount': 0,
+        'url': None,
+        'log': {
+            'id': 10,
+            'type': 'Container',
+            'url': 
'https://dev.azure.com/johndoe/7a3fd40e-2aed-4fac-bac9-511bf1a70206/_apis/build/builds/12/logs/10'
+        },
+        'task': None,
+        'attempt': 1,
+        'identifier': 'deploy_xxxx-dev_helm.__default'
+    }
+
+    expected = devops.CICDTask(
+        id='cfa20e98-6997-523c-4233-f0a7302c929f',
+        name='deploy_xxxx-dev_helm',
+        pipeline_id='azuredevops:Build:1:12',
+        status=devops.CICDStatus.DONE,
+        original_status='Completed',
+        original_result='Succeeded',
+        created_date='2023-02-25T06:22:36.8066667Z',
+        started_date='2023-02-25T06:22:36.8066667Z',
+        finished_date='2023-02-25T06:22:43.2333333Z',
+        result=devops.CICDResult.SUCCESS,
+        type=devops.CICDType.DEPLOYMENT,
+        duration_sec=6.426667213439941,
+        environment=None,  # Should be None because 'xxxx-dev' does not 
contain 'prod'
+        cicd_scope_id=context_with_environment_pattern.scope.domain_id()
+    )
+    assert_stream_convert(AzureDevOpsPlugin, 'jobs', raw, expected, 
context_with_environment_pattern)
+
+
+def test_stage_record_collected():
+    """Test that Stage records are also collected (not just Job records)."""
+    context = (
+        ContextBuilder(AzureDevOpsPlugin())
+        .with_connection(token='token')
+        .with_scope_config(
+            deployment_pattern='deploy',
+            production_pattern='prod'
+        )
+        .with_scope('johndoe/test-repo', 
url='https://github.com/johndoe/test-repo')
+        .build()
+    )
+
+    raw = {
+        'previousAttempts': [],
+        'id': 'stage-id-123',
+        'parentId': None,
+        'type': 'Stage',  # This is a Stage record
+        'name': 'deploy_prod_stage',
+        'build_id': 'azuredevops:Build:1:12',
+        'start_time': '2023-02-25T06:22:36.8066667Z',
+        'finish_time': '2023-02-25T06:22:43.2333333Z',
+        'currentOperation': None,
+        'percentComplete': None,
+        'state': 'completed',
+        'result': 'succeeded',
+        'resultCode': None,
+        'changeId': 18,
+        'lastModified': '0001-01-01T00:00:00',
+        'workerName': None,
+        'queueId': None,
+        'order': 1,
+        'details': None,
+        'errorCount': 0,
+        'warningCount': 0,
+        'url': None,
+        'log': None,
+        'task': None,
+        'attempt': 1,
+        'identifier': 'deploy_prod_stage'
+    }
+
+    expected = devops.CICDTask(
+        id='stage-id-123',
+        name='deploy_prod_stage',
+        pipeline_id='azuredevops:Build:1:12',
+        status=devops.CICDStatus.DONE,
+        original_status='Completed',
+        original_result='Succeeded',
+        created_date='2023-02-25T06:22:36.8066667Z',
+        started_date='2023-02-25T06:22:36.8066667Z',
+        finished_date='2023-02-25T06:22:43.2333333Z',
+        result=devops.CICDResult.SUCCESS,
+        type=devops.CICDType.DEPLOYMENT,
+        duration_sec=6.426667213439941,
+        environment=devops.CICDEnvironment.PRODUCTION,
+        cicd_scope_id=context.scope.domain_id()
+    )
+    assert_stream_convert(AzureDevOpsPlugin, 'jobs', raw, expected, context)

Reply via email to