This is an automated email from the ASF dual-hosted git repository.

klesh pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git


The following commit(s) were added to refs/heads/main by this push:
     new 0192f69fb feat(gitextractor): add support for excluding file 
extensions in commit stats (#8586)
0192f69fb is described below

commit 0192f69fb031a27356abee46aac049d35f5a7959
Author: Bamboo <[email protected]>
AuthorDate: Wed Sep 24 11:21:27 2025 +0800

    feat(gitextractor): add support for excluding file extensions in commit 
stats (#8586)
    
    * fix(jira): update epic collector to use new API endpoint and include all 
fields
    
    * fix(jira): enhance epic collector to dynamically select API endpoint 
based on JIRA version
    
    * fix(jira): update epic collector to use correct API endpoint for JIRA 
Cloud and Server versions
    
    * fix(jira): refactor epic collector to streamline API endpoint selection 
and enhance error handling
    
    * fix(jira): fix type for Jira issue descriptions
    
    * refactor(jira): update comment and worklog models to use 
FlexibleDescription type for comments
    
    * docs(jira): add ADF reference for FlexibleDescription type in issue model
    
    * refactor(migrations): enhance file meta migration to check column 
existence and nullability before modification
    
    * feat(gitlab): add PR size exclusion for specified file extensions
---
 backend/plugins/gitextractor/parser/repo_gogit.go  | 49 +++++++++--
 .../plugins/gitextractor/parser/repo_libgit2.go    | 95 ++++++++++++++++++----
 backend/plugins/gitextractor/parser/taskdata.go    |  2 +
 backend/plugins/gitlab/api/blueprint_v200.go       | 25 +++---
 .../20250921_add_pr_size_excluded_extensions.go    | 50 ++++++++++++
 .../gitlab/models/migrationscripts/register.go     |  1 +
 backend/plugins/gitlab/models/scope_config.go      |  2 +
 config-ui/src/plugins/register/gitlab/config.tsx   |  1 +
 .../src/plugins/register/gitlab/transformation.tsx | 30 +++++++
 9 files changed, 226 insertions(+), 29 deletions(-)

diff --git a/backend/plugins/gitextractor/parser/repo_gogit.go 
b/backend/plugins/gitextractor/parser/repo_gogit.go
index baaa4e42b..21cf09cd9 100644
--- a/backend/plugins/gitextractor/parser/repo_gogit.go
+++ b/backend/plugins/gitextractor/parser/repo_gogit.go
@@ -23,6 +23,7 @@ import (
        "encoding/hex"
        "fmt"
        "regexp"
+       "strings"
 
        "github.com/apache/incubator-devlake/core/dal"
        "github.com/apache/incubator-devlake/core/errors"
@@ -220,9 +221,6 @@ func (r *GogitRepoCollector) CollectBranches(subtaskCtx 
plugin.SubTaskContext) e
                func(r *plumbing.Reference) bool {
                        return r.Name().IsBranch() || r.Name().IsRemote()
                }, refIter)
-       if err != nil {
-               return err
-       }
        headRef, err := r.repo.Head()
        if err != nil {
                return err
@@ -336,7 +334,26 @@ func (r *GogitRepoCollector) CollectCommits(subtaskCtx 
plugin.SubTaskContext) (e
                        if err != nil {
                                return err
                        } else {
+                               excluded := map[string]struct{}{}
+                               for _, ext := range 
taskOpts.ExcludeFileExtensions {
+                                       e := 
strings.ToLower(strings.TrimSpace(ext))
+                                       if e == "" {
+                                               continue
+                                       }
+                                       excluded[e] = struct{}{}
+                               }
                                for _, stat := range stats {
+                                       nameLower := strings.ToLower(stat.Name)
+                                       skip := false
+                                       for ext := range excluded {
+                                               if strings.HasSuffix(nameLower, 
ext) {
+                                                       skip = true
+                                                       break
+                                               }
+                                       }
+                                       if skip {
+                                               continue
+                                       }
                                        codeCommit.Additions += stat.Addition
                                        // In some repos, deletion may be zero, 
which is different from git log --stat.
                                        // It seems go-git doesn't get the 
correct changes.
@@ -363,7 +380,7 @@ func (r *GogitRepoCollector) CollectCommits(subtaskCtx 
plugin.SubTaskContext) (e
                        return err
                }
                if !*taskOpts.SkipCommitFiles {
-                       if err := 
r.storeDiffCommitFilesComparedToParent(subtaskCtx, componentMap, commit); err 
!= nil {
+                       if err := 
r.storeDiffCommitFilesComparedToParent(subtaskCtx, componentMap, commit, 
taskOpts.ExcludeFileExtensions); err != nil {
                                return err
                        }
                }
@@ -423,7 +440,7 @@ func (r *GogitRepoCollector) getCurrentAndParentTree(ctx 
context.Context, commit
        return commitTree, firstParentTree, nil
 }
 
-func (r *GogitRepoCollector) storeDiffCommitFilesComparedToParent(subtaskCtx 
plugin.SubTaskContext, componentMap map[string]*regexp.Regexp, commit 
*object.Commit) (err error) {
+func (r *GogitRepoCollector) storeDiffCommitFilesComparedToParent(subtaskCtx 
plugin.SubTaskContext, componentMap map[string]*regexp.Regexp, commit 
*object.Commit, excludeExts []string) (err error) {
        commitTree, firstParentTree, err := 
r.getCurrentAndParentTree(subtaskCtx.GetContext(), commit)
        if err != nil {
                return err
@@ -433,12 +450,34 @@ func (r *GogitRepoCollector) 
storeDiffCommitFilesComparedToParent(subtaskCtx plu
        if err != nil {
                return err
        }
+       // normalize exclusions
+       excluded := map[string]struct{}{}
+       for _, ext := range excludeExts {
+               e := strings.ToLower(strings.TrimSpace(ext))
+               if e == "" {
+                       continue
+               }
+               excluded[e] = struct{}{}
+       }
        for _, p := range patch.Stats() {
                commitFile := &code.CommitFile{
                        CommitSha: commit.Hash.String(),
                }
                fileName := p.Name
                commitFile.FilePath = fileName
+               if len(excluded) > 0 {
+                       lower := strings.ToLower(fileName)
+                       skip := false
+                       for ext := range excluded {
+                               if strings.HasSuffix(lower, ext) {
+                                       skip = true
+                                       break
+                               }
+                       }
+                       if skip {
+                               continue
+                       }
+               }
                commitFile.Id = genCommitFileId(commitFile.CommitSha, fileName)
                commitFile.Deletions = p.Deletion
                commitFile.Additions = p.Addition
diff --git a/backend/plugins/gitextractor/parser/repo_libgit2.go 
b/backend/plugins/gitextractor/parser/repo_libgit2.go
index 78451feb0..69d82e253 100644
--- a/backend/plugins/gitextractor/parser/repo_libgit2.go
+++ b/backend/plugins/gitextractor/parser/repo_libgit2.go
@@ -25,6 +25,7 @@ import (
        "regexp"
        "sort"
        "strconv"
+       "strings"
 
        "github.com/apache/incubator-devlake/core/dal"
        "github.com/apache/incubator-devlake/core/errors"
@@ -317,12 +318,13 @@ func (r *Libgit2RepoCollector) CollectCommits(subtaskCtx 
plugin.SubTaskContext)
 
                if !*taskOpts.SkipCommitStat {
                        var stats *git.DiffStats
-                       if stats, err = r.getDiffComparedToParent(taskOpts, 
c.Sha, commit, parent, opts, componentMap); err != nil {
+                       var addIncluded, delIncluded int
+                       if stats, addIncluded, delIncluded, err = 
r.getDiffComparedToParent(taskOpts, c.Sha, commit, parent, opts, componentMap); 
err != nil {
                                return err
                        }
                        r.logger.Debug("state: %#+v\n", stats.Deletions())
-                       c.Additions += stats.Insertions()
-                       c.Deletions += stats.Deletions()
+                       c.Additions += addIncluded
+                       c.Deletions += delIncluded
                }
 
                err = r.store.Commits(c)
@@ -358,39 +360,83 @@ func (r *Libgit2RepoCollector) 
storeParentCommits(commitSha string, commit *git.
        return r.store.CommitParents(commitParents)
 }
 
-func (r *Libgit2RepoCollector) getDiffComparedToParent(taskOpts 
*GitExtractorOptions, commitSha string, commit *git.Commit, parent *git.Commit, 
opts *git.DiffOptions, componentMap map[string]*regexp.Regexp) (*git.DiffStats, 
errors.Error) {
+func (r *Libgit2RepoCollector) getDiffComparedToParent(taskOpts 
*GitExtractorOptions, commitSha string, commit *git.Commit, parent *git.Commit, 
opts *git.DiffOptions, componentMap map[string]*regexp.Regexp) (*git.DiffStats, 
int, int, errors.Error) {
        var err error
        var parentTree, tree *git.Tree
        if parent != nil {
                parentTree, err = parent.Tree()
        }
        if err != nil {
-               return nil, errors.Convert(err)
+               return nil, 0, 0, errors.Convert(err)
        }
        tree, err = commit.Tree()
        if err != nil {
-               return nil, errors.Convert(err)
+               return nil, 0, 0, errors.Convert(err)
        }
        var diff *git.Diff
        diff, err = r.repo.DiffTreeToTree(parentTree, tree, opts)
        if err != nil {
-               return nil, errors.Convert(err)
+               return nil, 0, 0, errors.Convert(err)
+       }
+       // build excluded extension set
+       excluded := map[string]struct{}{}
+       for _, ext := range taskOpts.ExcludeFileExtensions {
+               e := strings.ToLower(strings.TrimSpace(ext))
+               if e == "" {
+                       continue
+               }
+               excluded[e] = struct{}{}
        }
        if !*taskOpts.SkipCommitFiles {
-               err = r.storeCommitFilesFromDiff(commitSha, diff, componentMap)
+               err = r.storeCommitFilesFromDiff(commitSha, diff, componentMap, 
excluded)
                if err != nil {
-                       return nil, errors.Convert(err)
+                       return nil, 0, 0, errors.Convert(err)
                }
        }
        var stats *git.DiffStats
        stats, err = diff.Stats()
        if err != nil {
-               return nil, errors.Convert(err)
-       }
-       return stats, nil
+               return nil, 0, 0, errors.Convert(err)
+       }
+       // calculate included totals with exclusions
+       addIncluded := 0
+       delIncluded := 0
+       if len(excluded) == 0 {
+               addIncluded = stats.Insertions()
+               delIncluded = stats.Deletions()
+               return stats, addIncluded, delIncluded, nil
+       }
+       _ = diff.ForEach(func(file git.DiffDelta, progress float64) 
(git.DiffForEachHunkCallback, error) {
+               // choose path to check based on delta status; for deletions 
use old path
+               pathForCheck := file.NewFile.Path
+               if file.Status == git.DeltaDeleted || pathForCheck == "" {
+                       pathForCheck = file.OldFile.Path
+               }
+               lower := strings.ToLower(pathForCheck)
+               for ext := range excluded {
+                       if strings.HasSuffix(lower, ext) {
+                               // skip all lines for excluded files
+                               return func(hunk git.DiffHunk) 
(git.DiffForEachLineCallback, error) {
+                                       return func(line git.DiffLine) error { 
return nil }, nil
+                               }, nil
+                       }
+               }
+               return func(hunk git.DiffHunk) (git.DiffForEachLineCallback, 
error) {
+                       return func(line git.DiffLine) error {
+                               if line.Origin == git.DiffLineAddition {
+                                       addIncluded += line.NumLines
+                               }
+                               if line.Origin == git.DiffLineDeletion {
+                                       delIncluded += line.NumLines
+                               }
+                               return nil
+                       }, nil
+               }, nil
+       }, git.DiffDetailLines)
+       return stats, addIncluded, delIncluded, nil
 }
 
-func (r *Libgit2RepoCollector) storeCommitFilesFromDiff(commitSha string, diff 
*git.Diff, componentMap map[string]*regexp.Regexp) errors.Error {
+func (r *Libgit2RepoCollector) storeCommitFilesFromDiff(commitSha string, diff 
*git.Diff, componentMap map[string]*regexp.Regexp, excluded 
map[string]struct{}) errors.Error {
        var commitFile *code.CommitFile
        var commitFileComponent *code.CommitFileComponent
        var err error
@@ -404,15 +450,36 @@ func (r *Libgit2RepoCollector) 
storeCommitFilesFromDiff(commitSha string, diff *
                        }
                }
 
+               // skip files by extension if configured
+               if len(excluded) > 0 {
+                       pathForCheck := file.NewFile.Path
+                       if file.Status == git.DeltaDeleted || pathForCheck == 
"" {
+                               pathForCheck = file.OldFile.Path
+                       }
+                       lower := strings.ToLower(pathForCheck)
+                       for ext := range excluded {
+                               if strings.HasSuffix(lower, ext) {
+                                       // skip this file entirely
+                                       return func(hunk git.DiffHunk) 
(git.DiffForEachLineCallback, error) {
+                                               return func(line git.DiffLine) 
error { return nil }, nil
+                                       }, nil
+                               }
+                       }
+               }
+
                commitFile = new(code.CommitFile)
                commitFile.CommitSha = commitSha
+               // prefer new path; for deletions fall back to old path
                commitFile.FilePath = file.NewFile.Path
+               if commitFile.FilePath == "" {
+                       commitFile.FilePath = file.OldFile.Path
+               }
 
                // With some long path,the varchar(255) was not enough both ID 
and file_path
                // So we use the hash to compress the path in ID and add length 
of file_path.
                // Use commitSha and the sha256 of FilePath to create id
                shaFilePath := sha256.New()
-               shaFilePath.Write([]byte(file.NewFile.Path))
+               shaFilePath.Write([]byte(commitFile.FilePath))
                commitFile.Id = commitSha + ":" + 
hex.EncodeToString(shaFilePath.Sum(nil))
 
                commitFileComponent = new(code.CommitFileComponent)
diff --git a/backend/plugins/gitextractor/parser/taskdata.go 
b/backend/plugins/gitextractor/parser/taskdata.go
index 8dccf5ffe..bdfdbd2ae 100644
--- a/backend/plugins/gitextractor/parser/taskdata.go
+++ b/backend/plugins/gitextractor/parser/taskdata.go
@@ -47,4 +47,6 @@ type GitExtractorOptions struct {
        NoShallowClone        bool   `json:"noShallowClone" 
mapstructure:"noShallowClone"`
        ConnectionId          uint64 `json:"connectionId" 
mapstructure:"connectionId,omitempty"`
        PluginName            string `json:"pluginName" 
mapstructure:"pluginName,omitempty"`
+       // Configured by upstream plugin (e.g., GitLab) to exclude file 
extensions from commit stats
+       ExcludeFileExtensions []string `json:"excludeFileExtensions" 
mapstructure:"excludeFileExtensions"`
 }
diff --git a/backend/plugins/gitlab/api/blueprint_v200.go 
b/backend/plugins/gitlab/api/blueprint_v200.go
index b891f72b3..dbe14905d 100644
--- a/backend/plugins/gitlab/api/blueprint_v200.go
+++ b/backend/plugins/gitlab/api/blueprint_v200.go
@@ -132,17 +132,22 @@ func makePipelinePlanV200(
                                return nil, err
                        }
                        cloneUrl.User = url.UserPassword("git", 
connection.Token)
+                       gitextOpts := map[string]interface{}{
+                               "url":          cloneUrl.String(),
+                               "name":         gitlabProject.Name,
+                               "fullName":     gitlabProject.PathWithNamespace,
+                               "repoId":       
didgen.NewDomainIdGenerator(&models.GitlabProject{}).Generate(connection.ID, 
gitlabProject.GitlabId),
+                               "proxy":        connection.Proxy,
+                               "connectionId": gitlabProject.ConnectionId,
+                               "pluginName":   "gitlab",
+                       }
+                       if len(scopeConfig.PrSizeExcludedFileExtensions) > 0 {
+                               // pass excluded file extensions to 
gitextractor to support PR Size exclusion
+                               gitextOpts["excludeFileExtensions"] = 
scopeConfig.PrSizeExcludedFileExtensions
+                       }
                        stage = append(stage, &coreModels.PipelineTask{
-                               Plugin: "gitextractor",
-                               Options: map[string]interface{}{
-                                       "url":          cloneUrl.String(),
-                                       "name":         gitlabProject.Name,
-                                       "fullName":     
gitlabProject.PathWithNamespace,
-                                       "repoId":       
didgen.NewDomainIdGenerator(&models.GitlabProject{}).Generate(connection.ID, 
gitlabProject.GitlabId),
-                                       "proxy":        connection.Proxy,
-                                       "connectionId": 
gitlabProject.ConnectionId,
-                                       "pluginName":   "gitlab",
-                               },
+                               Plugin:  "gitextractor",
+                               Options: gitextOpts,
                        })
                }
 
diff --git 
a/backend/plugins/gitlab/models/migrationscripts/20250921_add_pr_size_excluded_extensions.go
 
b/backend/plugins/gitlab/models/migrationscripts/20250921_add_pr_size_excluded_extensions.go
new file mode 100644
index 000000000..a0971e38a
--- /dev/null
+++ 
b/backend/plugins/gitlab/models/migrationscripts/20250921_add_pr_size_excluded_extensions.go
@@ -0,0 +1,50 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package migrationscripts
+
+import (
+       "github.com/apache/incubator-devlake/core/context"
+       "github.com/apache/incubator-devlake/core/errors"
+       "github.com/apache/incubator-devlake/core/plugin"
+       "github.com/apache/incubator-devlake/helpers/migrationhelper"
+)
+
+var _ plugin.MigrationScript = (*addPrSizeExcludedFileExtensions)(nil)
+
+type gitlabScopeConfig20250921 struct {
+       PrSizeExcludedFileExtensions []string `gorm:"type:json" 
json:"prSizeExcludedFileExtensions" mapstructure:"prSizeExcludedFileExtensions"`
+}
+
+func (gitlabScopeConfig20250921) TableName() string {
+       return "_tool_gitlab_scope_configs"
+}
+
+type addPrSizeExcludedFileExtensions struct{}
+
+func (script *addPrSizeExcludedFileExtensions) Up(basicRes context.BasicRes) 
errors.Error {
+       return migrationhelper.AutoMigrateTables(
+               basicRes,
+               &gitlabScopeConfig20250921{},
+       )
+}
+
+func (*addPrSizeExcludedFileExtensions) Version() uint64 { return 
20250921100000 }
+
+func (*addPrSizeExcludedFileExtensions) Name() string {
+       return "add pr_size_excluded_file_extensions to 
_tool_gitlab_scope_configs"
+}
diff --git a/backend/plugins/gitlab/models/migrationscripts/register.go 
b/backend/plugins/gitlab/models/migrationscripts/register.go
index 1d89b2505..30a76f63e 100644
--- a/backend/plugins/gitlab/models/migrationscripts/register.go
+++ b/backend/plugins/gitlab/models/migrationscripts/register.go
@@ -52,5 +52,6 @@ func All() []plugin.MigrationScript {
                new(addGitlabAssigneeAndReviewerPrimaryKey),
                new(changeIssueComponentType),
                new(addIsChildToPipelines240906),
+               new(addPrSizeExcludedFileExtensions),
        }
 }
diff --git a/backend/plugins/gitlab/models/scope_config.go 
b/backend/plugins/gitlab/models/scope_config.go
index 78cfd7f2d..525720c32 100644
--- a/backend/plugins/gitlab/models/scope_config.go
+++ b/backend/plugins/gitlab/models/scope_config.go
@@ -37,6 +37,8 @@ type GitlabScopeConfig struct {
        ProductionPattern    string            
`mapstructure:"productionPattern,omitempty" json:"productionPattern" 
gorm:"type:varchar(255)"`
        EnvNamePattern       string            
`mapstructure:"envNamePattern,omitempty" json:"envNamePattern" 
gorm:"type:varchar(255)"`
        Refdiff              datatypes.JSONMap 
`mapstructure:"refdiff,omitempty" json:"refdiff" swaggertype:"object" 
format:"json"`
+       // A list of file extensions to exclude when calculating PR Size 
(affects commit additions/deletions used by dashboards)
+       PrSizeExcludedFileExtensions []string 
`mapstructure:"prSizeExcludedFileExtensions" 
json:"prSizeExcludedFileExtensions" gorm:"type:json;serializer:json"`
 }
 
 func (t GitlabScopeConfig) TableName() string {
diff --git a/config-ui/src/plugins/register/gitlab/config.tsx 
b/config-ui/src/plugins/register/gitlab/config.tsx
index 9caf5260a..b8924d7f7 100644
--- a/config-ui/src/plugins/register/gitlab/config.tsx
+++ b/config-ui/src/plugins/register/gitlab/config.tsx
@@ -78,6 +78,7 @@ export const GitLabConfig: IPluginConfig = {
       envNamePattern: '(?i)prod(.*)',
       deploymentPattern: '',
       productionPattern: '',
+      prSizeExcludedFileExtensions: [],
     },
   },
 };
diff --git a/config-ui/src/plugins/register/gitlab/transformation.tsx 
b/config-ui/src/plugins/register/gitlab/transformation.tsx
index 3ad9b97c6..2405e6614 100644
--- a/config-ui/src/plugins/register/gitlab/transformation.tsx
+++ b/config-ui/src/plugins/register/gitlab/transformation.tsx
@@ -178,4 +178,34 @@ const renderCollapseItems = ({
         </>
       ),
     },
+    {
+      key: 'CODEREVIEW',
+      label: 'Code Review',
+      style: panelStyle,
+      children: (
+        <>
+          <h3 style={{ marginBottom: 16 }}>
+            <span>PR Size Exclusions</span>
+          </h3>
+          <div style={{ margin: '8px 0' }}>
+            <span>Exclude file extensions (comma-separated, e.g. 
.md,.txt,.json)</span>
+            <Input
+              style={{ width: 360, margin: '0 8px' }}
+              placeholder=".md,.txt,.json"
+              value={(transformation.prSizeExcludedFileExtensions || 
[]).join(',')}
+              onChange={(e) =>
+                onChangeTransformation({
+                  ...transformation,
+                  prSizeExcludedFileExtensions: e.target.value
+                    .split(',')
+                    .map((s: string) => s.trim())
+                    .filter((s: string) => s),
+                })
+              }
+            />
+            <HelpTooltip content="These extensions are ignored when computing 
PR Size (additions/deletions)." />
+          </div>
+        </>
+      ),
+    },
   ].filter((it) => entities.includes(it.key));

Reply via email to