This is an automated email from the ASF dual-hosted git repository.
warren pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git
The following commit(s) were added to refs/heads/main by this push:
new a0f1c988a feat(azuredevops): add environment_pattern for extracting
environment names from job/stage names (#8671)
a0f1c988a is described below
commit a0f1c988ac12770aa58692dfd83de27d28ac758e
Author: Warren Chen <[email protected]>
AuthorDate: Wed Dec 31 19:22:59 2025 +0800
feat(azuredevops): add environment_pattern for extracting environment names
from job/stage names (#8671)
This enhancement addresses the issue where the Azure DevOps plugin was
unable to
correctly detect production deployments when the environment name is
embedded in
job/stage names rather than being in a standard format.
Changes:
- Add `environment_pattern` scope config field that supports regex capture
groups
to extract environment names from job/stage names
- Collect both Job and Stage records from the timeline API (previously only
Job)
- Add `identifier`, `type`, and `parent_id` fields to the Job model
- Update environment detection logic:
- If environment_pattern is configured, extract the environment name first
- Apply production_pattern to the extracted environment name
- Fall back to matching production_pattern against job name if no
extraction
- Fix default environment behavior: only default to PRODUCTION when
production_pattern is not configured (was defaulting PRODUCTION always)
- Add comprehensive tests for the new functionality
Example configuration for pipelines with jobs like 'deploy_xxxx-prod_helm':
- deployment_pattern: deploy
- production_pattern: prod
- environment_pattern:
(?:deploy|predeploy)[_-](.+?)(?:[_-](?:helm|terraform))?$
This extracts 'xxxx-prod' from the job name and then applies
production_pattern
to correctly identify it as a production deployment.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-authored-by: Claude Opus 4.5 <[email protected]>
---
.../plugins/azuredevops/azuredevops/models.py | 6 +
.../azuredevops/azuredevops/streams/builds.py | 14 +-
.../azuredevops/azuredevops/streams/jobs.py | 54 +++++-
.../plugins/azuredevops/tests/streams_test.py | 186 +++++++++++++++++++++
4 files changed, 250 insertions(+), 10 deletions(-)
diff --git a/backend/python/plugins/azuredevops/azuredevops/models.py
b/backend/python/plugins/azuredevops/azuredevops/models.py
index 27c112429..73f4d0764 100644
--- a/backend/python/plugins/azuredevops/azuredevops/models.py
+++ b/backend/python/plugins/azuredevops/azuredevops/models.py
@@ -37,6 +37,9 @@ class GitRepositoryConfig(ScopeConfig):
refdiff: Optional[RefDiffOptions]
deployment_pattern: Optional[re.Pattern]
production_pattern: Optional[re.Pattern]
+ # Optional pattern with capture group to extract environment name from
job/stage names
+ # Example: r'(?:deploy|predeploy)[_-](.+?)(?:[_-](?:helm|terraform))?$'
extracts 'xxxx-prod' from 'deploy_xxxx-prod_helm'
+ environment_pattern: Optional[re.Pattern]
class GitRepository(ToolScope, table=True):
@@ -146,3 +149,6 @@ class Job(ToolModel, table=True):
finish_time: Optional[datetime.datetime]
state: JobState
result: Optional[JobResult]
+ identifier: Optional[str]
+ type: Optional[str]
+ parent_id: Optional[str] = Field(source='/parentId')
diff --git a/backend/python/plugins/azuredevops/azuredevops/streams/builds.py
b/backend/python/plugins/azuredevops/azuredevops/streams/builds.py
index 5230bffa3..8bf91d797 100644
--- a/backend/python/plugins/azuredevops/azuredevops/streams/builds.py
+++ b/backend/python/plugins/azuredevops/azuredevops/streams/builds.py
@@ -72,10 +72,16 @@ class Builds(Stream):
if ctx.scope_config.deployment_pattern and
ctx.scope_config.deployment_pattern.search(b.name):
type = devops.CICDType.DEPLOYMENT
- environment = devops.CICDEnvironment.PRODUCTION
- if ctx.scope_config.production_pattern is not None and
ctx.scope_config.production_pattern.search(
- b.name) is None:
- environment = None
+ # Determine if this is a production environment
+ # Match production_pattern against pipeline name
+ environment = None
+ if ctx.scope_config.production_pattern is not None:
+ if ctx.scope_config.production_pattern.search(b.name):
+ environment = devops.CICDEnvironment.PRODUCTION
+ else:
+ # No production_pattern configured - default to PRODUCTION for
deployments
+ if type == devops.CICDType.DEPLOYMENT:
+ environment = devops.CICDEnvironment.PRODUCTION
if b.finish_time:
duration_sec = abs(b.finish_time.timestamp() -
b.start_time.timestamp())
diff --git a/backend/python/plugins/azuredevops/azuredevops/streams/jobs.py
b/backend/python/plugins/azuredevops/azuredevops/streams/jobs.py
index 7d2cd99fe..416b4696b 100644
--- a/backend/python/plugins/azuredevops/azuredevops/streams/jobs.py
+++ b/backend/python/plugins/azuredevops/azuredevops/streams/jobs.py
@@ -14,7 +14,7 @@
# limitations under the License.
from http import HTTPStatus
-from typing import Iterable
+from typing import Iterable, Optional
import pydevlake.domain_layer.devops as devops
from azuredevops.api import AzureDevOpsAPI
@@ -24,6 +24,31 @@ from pydevlake import Context, Substream, DomainType
from pydevlake.api import APIException
+def extract_environment_name(name: str, identifier: Optional[str], context:
Context) -> Optional[str]:
+ """
+ Extract environment name from job/stage name or identifier using
environment_pattern.
+
+ The environment_pattern should contain a capture group to extract the
environment name.
+ For example: r'(?:deploy|predeploy)[_-](.+?)(?:[_-](?:helm|terraform))?$'
+ This would extract 'xxxx-prod' from 'deploy_xxxx-prod_helm'
+ """
+ if not context.scope_config.environment_pattern:
+ return None
+
+ # Try to match against the name first
+ match = context.scope_config.environment_pattern.search(name)
+ if match and match.groups():
+ return match.group(1)
+
+ # If no match on name and identifier is available, try identifier
+ if identifier:
+ match = context.scope_config.environment_pattern.search(identifier)
+ if match and match.groups():
+ return match.group(1)
+
+ return None
+
+
class Jobs(Substream):
tool_model = Job
domain_types = [DomainType.CICD]
@@ -48,7 +73,8 @@ class Jobs(Substream):
if response.status == HTTPStatus.NO_CONTENT:
return
for raw_job in response.json["records"]:
- if raw_job["type"] == "Job":
+ # Collect both Job and Stage records to support environment
detection from stages
+ if raw_job["type"] in ("Job", "Stage"):
raw_job["build_id"] = parent.domain_id()
raw_job["x_request_url"] = response.get_url_with_query_string()
raw_job["x_request_input"] = {
@@ -87,10 +113,26 @@ class Jobs(Substream):
type = devops.CICDType.BUILD
if ctx.scope_config.deployment_pattern and
ctx.scope_config.deployment_pattern.search(j.name):
type = devops.CICDType.DEPLOYMENT
- environment = devops.CICDEnvironment.PRODUCTION
- if ctx.scope_config.production_pattern is not None and
ctx.scope_config.production_pattern.search(
- j.name) is None:
- environment = None
+
+ # Extract environment name using the new environment_pattern if
configured
+ extracted_env_name = extract_environment_name(j.name, j.identifier,
ctx)
+
+ # Determine if this is a production environment
+ # Priority: 1) Use extracted environment name with production_pattern
+ # 2) Fall back to matching production_pattern against job
name
+ environment = None
+ if ctx.scope_config.production_pattern is not None:
+ # If we extracted an environment name, use it for production
matching
+ if extracted_env_name:
+ if
ctx.scope_config.production_pattern.search(extracted_env_name):
+ environment = devops.CICDEnvironment.PRODUCTION
+ # Fall back to matching against job name
+ elif ctx.scope_config.production_pattern.search(j.name):
+ environment = devops.CICDEnvironment.PRODUCTION
+ else:
+ # No production_pattern configured - default to PRODUCTION for
deployments
+ if type == devops.CICDType.DEPLOYMENT:
+ environment = devops.CICDEnvironment.PRODUCTION
if j.finish_time:
duration_sec = abs(j.finish_time.timestamp() -
j.start_time.timestamp())
diff --git a/backend/python/plugins/azuredevops/tests/streams_test.py
b/backend/python/plugins/azuredevops/tests/streams_test.py
index 7e3905738..a5a65d9a5 100644
--- a/backend/python/plugins/azuredevops/tests/streams_test.py
+++ b/backend/python/plugins/azuredevops/tests/streams_test.py
@@ -360,3 +360,189 @@ def test_pull_request_commits_stream():
)
assert_stream_convert(AzureDevOpsPlugin, 'gitpullrequestcommits', raw,
expected)
+
+
[email protected]
+def context_with_environment_pattern():
+ """Context with environment_pattern configured to extract environment
names from job names."""
+ return (
+ ContextBuilder(AzureDevOpsPlugin())
+ .with_connection(token='token')
+ .with_scope_config(
+ deployment_pattern='deploy',
+ production_pattern='prod',
+ # Pattern to extract environment name from job names like
'deploy_xxxx-prod_helm'
+
environment_pattern=r'(?:deploy|predeploy)[_-](.+?)(?:[_-](?:helm|terraform))?$'
+ )
+ .with_scope('johndoe/test-repo',
url='https://github.com/johndoe/test-repo')
+ .build()
+ )
+
+
+def
test_jobs_stream_with_environment_pattern(context_with_environment_pattern):
+ """Test that environment_pattern extracts environment name and uses it for
production matching."""
+ raw = {
+ 'previousAttempts': [],
+ 'id': 'cfa20e98-6997-523c-4233-f0a7302c929f',
+ 'parentId': '9ecf18fe-987d-5811-7c63-300aecae35da',
+ 'type': 'Job',
+ 'name': 'deploy_xxxx-prod_helm', # environment name 'xxxx-prod'
should be extracted
+ 'build_id': 'azuredevops:Build:1:12',
+ 'start_time': '2023-02-25T06:22:36.8066667Z',
+ 'finish_time': '2023-02-25T06:22:43.2333333Z',
+ 'currentOperation': None,
+ 'percentComplete': None,
+ 'state': 'completed',
+ 'result': 'succeeded',
+ 'resultCode': None,
+ 'changeId': 18,
+ 'lastModified': '0001-01-01T00:00:00',
+ 'workerName': 'Hosted Agent',
+ 'queueId': 9,
+ 'order': 1,
+ 'details': None,
+ 'errorCount': 0,
+ 'warningCount': 0,
+ 'url': None,
+ 'log': {
+ 'id': 10,
+ 'type': 'Container',
+ 'url':
'https://dev.azure.com/johndoe/7a3fd40e-2aed-4fac-bac9-511bf1a70206/_apis/build/builds/12/logs/10'
+ },
+ 'task': None,
+ 'attempt': 1,
+ 'identifier': 'deploy_xxxx-prod_helm.__default'
+ }
+
+ expected = devops.CICDTask(
+ id='cfa20e98-6997-523c-4233-f0a7302c929f',
+ name='deploy_xxxx-prod_helm',
+ pipeline_id='azuredevops:Build:1:12',
+ status=devops.CICDStatus.DONE,
+ original_status='Completed',
+ original_result='Succeeded',
+ created_date='2023-02-25T06:22:36.8066667Z',
+ started_date='2023-02-25T06:22:36.8066667Z',
+ finished_date='2023-02-25T06:22:43.2333333Z',
+ result=devops.CICDResult.SUCCESS,
+ type=devops.CICDType.DEPLOYMENT,
+ duration_sec=6.426667213439941,
+ environment=devops.CICDEnvironment.PRODUCTION, # Should match because
'xxxx-prod' contains 'prod'
+ cicd_scope_id=context_with_environment_pattern.scope.domain_id()
+ )
+ assert_stream_convert(AzureDevOpsPlugin, 'jobs', raw, expected,
context_with_environment_pattern)
+
+
+def
test_jobs_stream_with_environment_pattern_non_prod(context_with_environment_pattern):
+ """Test that non-prod environments are correctly identified."""
+ raw = {
+ 'previousAttempts': [],
+ 'id': 'cfa20e98-6997-523c-4233-f0a7302c929f',
+ 'parentId': '9ecf18fe-987d-5811-7c63-300aecae35da',
+ 'type': 'Job',
+ 'name': 'deploy_xxxx-dev_helm', # environment name 'xxxx-dev' should
be extracted, not prod
+ 'build_id': 'azuredevops:Build:1:12',
+ 'start_time': '2023-02-25T06:22:36.8066667Z',
+ 'finish_time': '2023-02-25T06:22:43.2333333Z',
+ 'currentOperation': None,
+ 'percentComplete': None,
+ 'state': 'completed',
+ 'result': 'succeeded',
+ 'resultCode': None,
+ 'changeId': 18,
+ 'lastModified': '0001-01-01T00:00:00',
+ 'workerName': 'Hosted Agent',
+ 'queueId': 9,
+ 'order': 1,
+ 'details': None,
+ 'errorCount': 0,
+ 'warningCount': 0,
+ 'url': None,
+ 'log': {
+ 'id': 10,
+ 'type': 'Container',
+ 'url':
'https://dev.azure.com/johndoe/7a3fd40e-2aed-4fac-bac9-511bf1a70206/_apis/build/builds/12/logs/10'
+ },
+ 'task': None,
+ 'attempt': 1,
+ 'identifier': 'deploy_xxxx-dev_helm.__default'
+ }
+
+ expected = devops.CICDTask(
+ id='cfa20e98-6997-523c-4233-f0a7302c929f',
+ name='deploy_xxxx-dev_helm',
+ pipeline_id='azuredevops:Build:1:12',
+ status=devops.CICDStatus.DONE,
+ original_status='Completed',
+ original_result='Succeeded',
+ created_date='2023-02-25T06:22:36.8066667Z',
+ started_date='2023-02-25T06:22:36.8066667Z',
+ finished_date='2023-02-25T06:22:43.2333333Z',
+ result=devops.CICDResult.SUCCESS,
+ type=devops.CICDType.DEPLOYMENT,
+ duration_sec=6.426667213439941,
+ environment=None, # Should be None because 'xxxx-dev' does not
contain 'prod'
+ cicd_scope_id=context_with_environment_pattern.scope.domain_id()
+ )
+ assert_stream_convert(AzureDevOpsPlugin, 'jobs', raw, expected,
context_with_environment_pattern)
+
+
+def test_stage_record_collected():
+ """Test that Stage records are also collected (not just Job records)."""
+ context = (
+ ContextBuilder(AzureDevOpsPlugin())
+ .with_connection(token='token')
+ .with_scope_config(
+ deployment_pattern='deploy',
+ production_pattern='prod'
+ )
+ .with_scope('johndoe/test-repo',
url='https://github.com/johndoe/test-repo')
+ .build()
+ )
+
+ raw = {
+ 'previousAttempts': [],
+ 'id': 'stage-id-123',
+ 'parentId': None,
+ 'type': 'Stage', # This is a Stage record
+ 'name': 'deploy_prod_stage',
+ 'build_id': 'azuredevops:Build:1:12',
+ 'start_time': '2023-02-25T06:22:36.8066667Z',
+ 'finish_time': '2023-02-25T06:22:43.2333333Z',
+ 'currentOperation': None,
+ 'percentComplete': None,
+ 'state': 'completed',
+ 'result': 'succeeded',
+ 'resultCode': None,
+ 'changeId': 18,
+ 'lastModified': '0001-01-01T00:00:00',
+ 'workerName': None,
+ 'queueId': None,
+ 'order': 1,
+ 'details': None,
+ 'errorCount': 0,
+ 'warningCount': 0,
+ 'url': None,
+ 'log': None,
+ 'task': None,
+ 'attempt': 1,
+ 'identifier': 'deploy_prod_stage'
+ }
+
+ expected = devops.CICDTask(
+ id='stage-id-123',
+ name='deploy_prod_stage',
+ pipeline_id='azuredevops:Build:1:12',
+ status=devops.CICDStatus.DONE,
+ original_status='Completed',
+ original_result='Succeeded',
+ created_date='2023-02-25T06:22:36.8066667Z',
+ started_date='2023-02-25T06:22:36.8066667Z',
+ finished_date='2023-02-25T06:22:43.2333333Z',
+ result=devops.CICDResult.SUCCESS,
+ type=devops.CICDType.DEPLOYMENT,
+ duration_sec=6.426667213439941,
+ environment=devops.CICDEnvironment.PRODUCTION,
+ cicd_scope_id=context.scope.domain_id()
+ )
+ assert_stream_convert(AzureDevOpsPlugin, 'jobs', raw, expected, context)