This is an automated email from the ASF dual-hosted git repository.
klesh pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git
The following commit(s) were added to refs/heads/main by this push:
new 0192f69fb feat(gitextractor): add support for excluding file
extensions in commit stats (#8586)
0192f69fb is described below
commit 0192f69fb031a27356abee46aac049d35f5a7959
Author: Bamboo <[email protected]>
AuthorDate: Wed Sep 24 11:21:27 2025 +0800
feat(gitextractor): add support for excluding file extensions in commit
stats (#8586)
* fix(jira): update epic collector to use new API endpoint and include all
fields
* fix(jira): enhance epic collector to dynamically select API endpoint
based on JIRA version
* fix(jira): update epic collector to use correct API endpoint for JIRA
Cloud and Server versions
* fix(jira): refactor epic collector to streamline API endpoint selection
and enhance error handling
* fix(jira): fix type for Jira issue descriptions
* refactor(jira): update comment and worklog models to use
FlexibleDescription type for comments
* docs(jira): add ADF reference for FlexibleDescription type in issue model
* refactor(migrations): enhance file meta migration to check column
existence and nullability before modification
* feat(gitlab): add PR size exclusion for specified file extensions
---
backend/plugins/gitextractor/parser/repo_gogit.go | 49 +++++++++--
.../plugins/gitextractor/parser/repo_libgit2.go | 95 ++++++++++++++++++----
backend/plugins/gitextractor/parser/taskdata.go | 2 +
backend/plugins/gitlab/api/blueprint_v200.go | 25 +++---
.../20250921_add_pr_size_excluded_extensions.go | 50 ++++++++++++
.../gitlab/models/migrationscripts/register.go | 1 +
backend/plugins/gitlab/models/scope_config.go | 2 +
config-ui/src/plugins/register/gitlab/config.tsx | 1 +
.../src/plugins/register/gitlab/transformation.tsx | 30 +++++++
9 files changed, 226 insertions(+), 29 deletions(-)
diff --git a/backend/plugins/gitextractor/parser/repo_gogit.go
b/backend/plugins/gitextractor/parser/repo_gogit.go
index baaa4e42b..21cf09cd9 100644
--- a/backend/plugins/gitextractor/parser/repo_gogit.go
+++ b/backend/plugins/gitextractor/parser/repo_gogit.go
@@ -23,6 +23,7 @@ import (
"encoding/hex"
"fmt"
"regexp"
+ "strings"
"github.com/apache/incubator-devlake/core/dal"
"github.com/apache/incubator-devlake/core/errors"
@@ -220,9 +221,6 @@ func (r *GogitRepoCollector) CollectBranches(subtaskCtx
plugin.SubTaskContext) e
func(r *plumbing.Reference) bool {
return r.Name().IsBranch() || r.Name().IsRemote()
}, refIter)
- if err != nil {
- return err
- }
headRef, err := r.repo.Head()
if err != nil {
return err
@@ -336,7 +334,26 @@ func (r *GogitRepoCollector) CollectCommits(subtaskCtx
plugin.SubTaskContext) (e
if err != nil {
return err
} else {
+ excluded := map[string]struct{}{}
+ for _, ext := range
taskOpts.ExcludeFileExtensions {
+ e :=
strings.ToLower(strings.TrimSpace(ext))
+ if e == "" {
+ continue
+ }
+ excluded[e] = struct{}{}
+ }
for _, stat := range stats {
+ nameLower := strings.ToLower(stat.Name)
+ skip := false
+ for ext := range excluded {
+ if strings.HasSuffix(nameLower,
ext) {
+ skip = true
+ break
+ }
+ }
+ if skip {
+ continue
+ }
codeCommit.Additions += stat.Addition
// In some repos, deletion may be zero,
which is different from git log --stat.
// It seems go-git doesn't get the
correct changes.
@@ -363,7 +380,7 @@ func (r *GogitRepoCollector) CollectCommits(subtaskCtx
plugin.SubTaskContext) (e
return err
}
if !*taskOpts.SkipCommitFiles {
- if err :=
r.storeDiffCommitFilesComparedToParent(subtaskCtx, componentMap, commit); err
!= nil {
+ if err :=
r.storeDiffCommitFilesComparedToParent(subtaskCtx, componentMap, commit,
taskOpts.ExcludeFileExtensions); err != nil {
return err
}
}
@@ -423,7 +440,7 @@ func (r *GogitRepoCollector) getCurrentAndParentTree(ctx
context.Context, commit
return commitTree, firstParentTree, nil
}
-func (r *GogitRepoCollector) storeDiffCommitFilesComparedToParent(subtaskCtx
plugin.SubTaskContext, componentMap map[string]*regexp.Regexp, commit
*object.Commit) (err error) {
+func (r *GogitRepoCollector) storeDiffCommitFilesComparedToParent(subtaskCtx
plugin.SubTaskContext, componentMap map[string]*regexp.Regexp, commit
*object.Commit, excludeExts []string) (err error) {
commitTree, firstParentTree, err :=
r.getCurrentAndParentTree(subtaskCtx.GetContext(), commit)
if err != nil {
return err
@@ -433,12 +450,34 @@ func (r *GogitRepoCollector)
storeDiffCommitFilesComparedToParent(subtaskCtx plu
if err != nil {
return err
}
+ // normalize exclusions
+ excluded := map[string]struct{}{}
+ for _, ext := range excludeExts {
+ e := strings.ToLower(strings.TrimSpace(ext))
+ if e == "" {
+ continue
+ }
+ excluded[e] = struct{}{}
+ }
for _, p := range patch.Stats() {
commitFile := &code.CommitFile{
CommitSha: commit.Hash.String(),
}
fileName := p.Name
commitFile.FilePath = fileName
+ if len(excluded) > 0 {
+ lower := strings.ToLower(fileName)
+ skip := false
+ for ext := range excluded {
+ if strings.HasSuffix(lower, ext) {
+ skip = true
+ break
+ }
+ }
+ if skip {
+ continue
+ }
+ }
commitFile.Id = genCommitFileId(commitFile.CommitSha, fileName)
commitFile.Deletions = p.Deletion
commitFile.Additions = p.Addition
diff --git a/backend/plugins/gitextractor/parser/repo_libgit2.go
b/backend/plugins/gitextractor/parser/repo_libgit2.go
index 78451feb0..69d82e253 100644
--- a/backend/plugins/gitextractor/parser/repo_libgit2.go
+++ b/backend/plugins/gitextractor/parser/repo_libgit2.go
@@ -25,6 +25,7 @@ import (
"regexp"
"sort"
"strconv"
+ "strings"
"github.com/apache/incubator-devlake/core/dal"
"github.com/apache/incubator-devlake/core/errors"
@@ -317,12 +318,13 @@ func (r *Libgit2RepoCollector) CollectCommits(subtaskCtx
plugin.SubTaskContext)
if !*taskOpts.SkipCommitStat {
var stats *git.DiffStats
- if stats, err = r.getDiffComparedToParent(taskOpts,
c.Sha, commit, parent, opts, componentMap); err != nil {
+ var addIncluded, delIncluded int
+ if stats, addIncluded, delIncluded, err =
r.getDiffComparedToParent(taskOpts, c.Sha, commit, parent, opts, componentMap);
err != nil {
return err
}
r.logger.Debug("state: %#+v\n", stats.Deletions())
- c.Additions += stats.Insertions()
- c.Deletions += stats.Deletions()
+ c.Additions += addIncluded
+ c.Deletions += delIncluded
}
err = r.store.Commits(c)
@@ -358,39 +360,83 @@ func (r *Libgit2RepoCollector)
storeParentCommits(commitSha string, commit *git.
return r.store.CommitParents(commitParents)
}
-func (r *Libgit2RepoCollector) getDiffComparedToParent(taskOpts
*GitExtractorOptions, commitSha string, commit *git.Commit, parent *git.Commit,
opts *git.DiffOptions, componentMap map[string]*regexp.Regexp) (*git.DiffStats,
errors.Error) {
+func (r *Libgit2RepoCollector) getDiffComparedToParent(taskOpts
*GitExtractorOptions, commitSha string, commit *git.Commit, parent *git.Commit,
opts *git.DiffOptions, componentMap map[string]*regexp.Regexp) (*git.DiffStats,
int, int, errors.Error) {
var err error
var parentTree, tree *git.Tree
if parent != nil {
parentTree, err = parent.Tree()
}
if err != nil {
- return nil, errors.Convert(err)
+ return nil, 0, 0, errors.Convert(err)
}
tree, err = commit.Tree()
if err != nil {
- return nil, errors.Convert(err)
+ return nil, 0, 0, errors.Convert(err)
}
var diff *git.Diff
diff, err = r.repo.DiffTreeToTree(parentTree, tree, opts)
if err != nil {
- return nil, errors.Convert(err)
+ return nil, 0, 0, errors.Convert(err)
+ }
+ // build excluded extension set
+ excluded := map[string]struct{}{}
+ for _, ext := range taskOpts.ExcludeFileExtensions {
+ e := strings.ToLower(strings.TrimSpace(ext))
+ if e == "" {
+ continue
+ }
+ excluded[e] = struct{}{}
}
if !*taskOpts.SkipCommitFiles {
- err = r.storeCommitFilesFromDiff(commitSha, diff, componentMap)
+ err = r.storeCommitFilesFromDiff(commitSha, diff, componentMap,
excluded)
if err != nil {
- return nil, errors.Convert(err)
+ return nil, 0, 0, errors.Convert(err)
}
}
var stats *git.DiffStats
stats, err = diff.Stats()
if err != nil {
- return nil, errors.Convert(err)
- }
- return stats, nil
+ return nil, 0, 0, errors.Convert(err)
+ }
+ // calculate included totals with exclusions
+ addIncluded := 0
+ delIncluded := 0
+ if len(excluded) == 0 {
+ addIncluded = stats.Insertions()
+ delIncluded = stats.Deletions()
+ return stats, addIncluded, delIncluded, nil
+ }
+ _ = diff.ForEach(func(file git.DiffDelta, progress float64)
(git.DiffForEachHunkCallback, error) {
+ // choose path to check based on delta status; for deletions
use old path
+ pathForCheck := file.NewFile.Path
+ if file.Status == git.DeltaDeleted || pathForCheck == "" {
+ pathForCheck = file.OldFile.Path
+ }
+ lower := strings.ToLower(pathForCheck)
+ for ext := range excluded {
+ if strings.HasSuffix(lower, ext) {
+ // skip all lines for excluded files
+ return func(hunk git.DiffHunk)
(git.DiffForEachLineCallback, error) {
+ return func(line git.DiffLine) error {
return nil }, nil
+ }, nil
+ }
+ }
+ return func(hunk git.DiffHunk) (git.DiffForEachLineCallback,
error) {
+ return func(line git.DiffLine) error {
+ if line.Origin == git.DiffLineAddition {
+ addIncluded += line.NumLines
+ }
+ if line.Origin == git.DiffLineDeletion {
+ delIncluded += line.NumLines
+ }
+ return nil
+ }, nil
+ }, nil
+ }, git.DiffDetailLines)
+ return stats, addIncluded, delIncluded, nil
}
-func (r *Libgit2RepoCollector) storeCommitFilesFromDiff(commitSha string, diff
*git.Diff, componentMap map[string]*regexp.Regexp) errors.Error {
+func (r *Libgit2RepoCollector) storeCommitFilesFromDiff(commitSha string, diff
*git.Diff, componentMap map[string]*regexp.Regexp, excluded
map[string]struct{}) errors.Error {
var commitFile *code.CommitFile
var commitFileComponent *code.CommitFileComponent
var err error
@@ -404,15 +450,36 @@ func (r *Libgit2RepoCollector)
storeCommitFilesFromDiff(commitSha string, diff *
}
}
+ // skip files by extension if configured
+ if len(excluded) > 0 {
+ pathForCheck := file.NewFile.Path
+ if file.Status == git.DeltaDeleted || pathForCheck ==
"" {
+ pathForCheck = file.OldFile.Path
+ }
+ lower := strings.ToLower(pathForCheck)
+ for ext := range excluded {
+ if strings.HasSuffix(lower, ext) {
+ // skip this file entirely
+ return func(hunk git.DiffHunk)
(git.DiffForEachLineCallback, error) {
+ return func(line git.DiffLine)
error { return nil }, nil
+ }, nil
+ }
+ }
+ }
+
commitFile = new(code.CommitFile)
commitFile.CommitSha = commitSha
+ // prefer new path; for deletions fall back to old path
commitFile.FilePath = file.NewFile.Path
+ if commitFile.FilePath == "" {
+ commitFile.FilePath = file.OldFile.Path
+ }
// With some long path,the varchar(255) was not enough both ID
and file_path
// So we use the hash to compress the path in ID and add length
of file_path.
// Use commitSha and the sha256 of FilePath to create id
shaFilePath := sha256.New()
- shaFilePath.Write([]byte(file.NewFile.Path))
+ shaFilePath.Write([]byte(commitFile.FilePath))
commitFile.Id = commitSha + ":" +
hex.EncodeToString(shaFilePath.Sum(nil))
commitFileComponent = new(code.CommitFileComponent)
diff --git a/backend/plugins/gitextractor/parser/taskdata.go
b/backend/plugins/gitextractor/parser/taskdata.go
index 8dccf5ffe..bdfdbd2ae 100644
--- a/backend/plugins/gitextractor/parser/taskdata.go
+++ b/backend/plugins/gitextractor/parser/taskdata.go
@@ -47,4 +47,6 @@ type GitExtractorOptions struct {
NoShallowClone bool `json:"noShallowClone"
mapstructure:"noShallowClone"`
ConnectionId uint64 `json:"connectionId"
mapstructure:"connectionId,omitempty"`
PluginName string `json:"pluginName"
mapstructure:"pluginName,omitempty"`
+ // Configured by upstream plugin (e.g., GitLab) to exclude file
extensions from commit stats
+ ExcludeFileExtensions []string `json:"excludeFileExtensions"
mapstructure:"excludeFileExtensions"`
}
diff --git a/backend/plugins/gitlab/api/blueprint_v200.go
b/backend/plugins/gitlab/api/blueprint_v200.go
index b891f72b3..dbe14905d 100644
--- a/backend/plugins/gitlab/api/blueprint_v200.go
+++ b/backend/plugins/gitlab/api/blueprint_v200.go
@@ -132,17 +132,22 @@ func makePipelinePlanV200(
return nil, err
}
cloneUrl.User = url.UserPassword("git",
connection.Token)
+ gitextOpts := map[string]interface{}{
+ "url": cloneUrl.String(),
+ "name": gitlabProject.Name,
+ "fullName": gitlabProject.PathWithNamespace,
+ "repoId":
didgen.NewDomainIdGenerator(&models.GitlabProject{}).Generate(connection.ID,
gitlabProject.GitlabId),
+ "proxy": connection.Proxy,
+ "connectionId": gitlabProject.ConnectionId,
+ "pluginName": "gitlab",
+ }
+ if len(scopeConfig.PrSizeExcludedFileExtensions) > 0 {
+ // pass excluded file extensions to
gitextractor to support PR Size exclusion
+ gitextOpts["excludeFileExtensions"] =
scopeConfig.PrSizeExcludedFileExtensions
+ }
stage = append(stage, &coreModels.PipelineTask{
- Plugin: "gitextractor",
- Options: map[string]interface{}{
- "url": cloneUrl.String(),
- "name": gitlabProject.Name,
- "fullName":
gitlabProject.PathWithNamespace,
- "repoId":
didgen.NewDomainIdGenerator(&models.GitlabProject{}).Generate(connection.ID,
gitlabProject.GitlabId),
- "proxy": connection.Proxy,
- "connectionId":
gitlabProject.ConnectionId,
- "pluginName": "gitlab",
- },
+ Plugin: "gitextractor",
+ Options: gitextOpts,
})
}
diff --git
a/backend/plugins/gitlab/models/migrationscripts/20250921_add_pr_size_excluded_extensions.go
b/backend/plugins/gitlab/models/migrationscripts/20250921_add_pr_size_excluded_extensions.go
new file mode 100644
index 000000000..a0971e38a
--- /dev/null
+++
b/backend/plugins/gitlab/models/migrationscripts/20250921_add_pr_size_excluded_extensions.go
@@ -0,0 +1,50 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package migrationscripts
+
+import (
+ "github.com/apache/incubator-devlake/core/context"
+ "github.com/apache/incubator-devlake/core/errors"
+ "github.com/apache/incubator-devlake/core/plugin"
+ "github.com/apache/incubator-devlake/helpers/migrationhelper"
+)
+
+var _ plugin.MigrationScript = (*addPrSizeExcludedFileExtensions)(nil)
+
+type gitlabScopeConfig20250921 struct {
+ PrSizeExcludedFileExtensions []string `gorm:"type:json"
json:"prSizeExcludedFileExtensions" mapstructure:"prSizeExcludedFileExtensions"`
+}
+
+func (gitlabScopeConfig20250921) TableName() string {
+ return "_tool_gitlab_scope_configs"
+}
+
+type addPrSizeExcludedFileExtensions struct{}
+
+func (script *addPrSizeExcludedFileExtensions) Up(basicRes context.BasicRes)
errors.Error {
+ return migrationhelper.AutoMigrateTables(
+ basicRes,
+ &gitlabScopeConfig20250921{},
+ )
+}
+
+func (*addPrSizeExcludedFileExtensions) Version() uint64 { return
20250921100000 }
+
+func (*addPrSizeExcludedFileExtensions) Name() string {
+ return "add pr_size_excluded_file_extensions to
_tool_gitlab_scope_configs"
+}
diff --git a/backend/plugins/gitlab/models/migrationscripts/register.go
b/backend/plugins/gitlab/models/migrationscripts/register.go
index 1d89b2505..30a76f63e 100644
--- a/backend/plugins/gitlab/models/migrationscripts/register.go
+++ b/backend/plugins/gitlab/models/migrationscripts/register.go
@@ -52,5 +52,6 @@ func All() []plugin.MigrationScript {
new(addGitlabAssigneeAndReviewerPrimaryKey),
new(changeIssueComponentType),
new(addIsChildToPipelines240906),
+ new(addPrSizeExcludedFileExtensions),
}
}
diff --git a/backend/plugins/gitlab/models/scope_config.go
b/backend/plugins/gitlab/models/scope_config.go
index 78cfd7f2d..525720c32 100644
--- a/backend/plugins/gitlab/models/scope_config.go
+++ b/backend/plugins/gitlab/models/scope_config.go
@@ -37,6 +37,8 @@ type GitlabScopeConfig struct {
ProductionPattern string
`mapstructure:"productionPattern,omitempty" json:"productionPattern"
gorm:"type:varchar(255)"`
EnvNamePattern string
`mapstructure:"envNamePattern,omitempty" json:"envNamePattern"
gorm:"type:varchar(255)"`
Refdiff datatypes.JSONMap
`mapstructure:"refdiff,omitempty" json:"refdiff" swaggertype:"object"
format:"json"`
+ // A list of file extensions to exclude when calculating PR Size
(affects commit additions/deletions used by dashboards)
+ PrSizeExcludedFileExtensions []string
`mapstructure:"prSizeExcludedFileExtensions"
json:"prSizeExcludedFileExtensions" gorm:"type:json;serializer:json"`
}
func (t GitlabScopeConfig) TableName() string {
diff --git a/config-ui/src/plugins/register/gitlab/config.tsx
b/config-ui/src/plugins/register/gitlab/config.tsx
index 9caf5260a..b8924d7f7 100644
--- a/config-ui/src/plugins/register/gitlab/config.tsx
+++ b/config-ui/src/plugins/register/gitlab/config.tsx
@@ -78,6 +78,7 @@ export const GitLabConfig: IPluginConfig = {
envNamePattern: '(?i)prod(.*)',
deploymentPattern: '',
productionPattern: '',
+ prSizeExcludedFileExtensions: [],
},
},
};
diff --git a/config-ui/src/plugins/register/gitlab/transformation.tsx
b/config-ui/src/plugins/register/gitlab/transformation.tsx
index 3ad9b97c6..2405e6614 100644
--- a/config-ui/src/plugins/register/gitlab/transformation.tsx
+++ b/config-ui/src/plugins/register/gitlab/transformation.tsx
@@ -178,4 +178,34 @@ const renderCollapseItems = ({
</>
),
},
+ {
+ key: 'CODEREVIEW',
+ label: 'Code Review',
+ style: panelStyle,
+ children: (
+ <>
+ <h3 style={{ marginBottom: 16 }}>
+ <span>PR Size Exclusions</span>
+ </h3>
+ <div style={{ margin: '8px 0' }}>
+ <span>Exclude file extensions (comma-separated, e.g.
.md,.txt,.json)</span>
+ <Input
+ style={{ width: 360, margin: '0 8px' }}
+ placeholder=".md,.txt,.json"
+ value={(transformation.prSizeExcludedFileExtensions ||
[]).join(',')}
+ onChange={(e) =>
+ onChangeTransformation({
+ ...transformation,
+ prSizeExcludedFileExtensions: e.target.value
+ .split(',')
+ .map((s: string) => s.trim())
+ .filter((s: string) => s),
+ })
+ }
+ />
+ <HelpTooltip content="These extensions are ignored when computing
PR Size (additions/deletions)." />
+ </div>
+ </>
+ ),
+ },
].filter((it) => entities.includes(it.key));