This is an automated email from the ASF dual-hosted git repository.

abeizn pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git


The following commit(s) were added to refs/heads/main by this push:
     new 2090fecd8 refactor: fetch branches before shallow fetch to reduce the 
total commits collected (#7760)
2090fecd8 is described below

commit 2090fecd8629daa14b129dc81d160c4f20e18c6e
Author: Klesh Wong <[email protected]>
AuthorDate: Thu Jul 18 15:50:57 2024 +0800

    refactor: fetch branches before shallow fetch to reduce the total commits 
collected (#7760)
---
 backend/plugins/gitextractor/impl/impl.go           |  8 +++++---
 backend/plugins/gitextractor/parser/clone_gitcli.go | 20 ++++++++++++--------
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/backend/plugins/gitextractor/impl/impl.go 
b/backend/plugins/gitextractor/impl/impl.go
index cd96d8198..778e0e32d 100644
--- a/backend/plugins/gitextractor/impl/impl.go
+++ b/backend/plugins/gitextractor/impl/impl.go
@@ -108,9 +108,11 @@ func (p GitExtractor) PrepareTaskData(taskCtx 
plugin.TaskContext, options map[st
 
 func (p GitExtractor) Close(taskCtx plugin.TaskContext) errors.Error {
        if taskData, ok := taskCtx.GetData().(*parser.GitExtractorTaskData); ok 
{
-               if taskData.GitRepo != nil {
-                       if err := taskData.GitRepo.Close(taskCtx.GetContext()); 
err != nil {
-                               return errors.Convert(err)
+               if 
!taskCtx.GetConfigReader().GetBool("GIT_EXTRACTOR_KEEP_REPO") {
+                       if taskData.GitRepo != nil {
+                               if err := 
taskData.GitRepo.Close(taskCtx.GetContext()); err != nil {
+                                       return errors.Convert(err)
+                               }
                        }
                }
        }
diff --git a/backend/plugins/gitextractor/parser/clone_gitcli.go 
b/backend/plugins/gitextractor/parser/clone_gitcli.go
index 184dc26ff..bd7e7a7ab 100644
--- a/backend/plugins/gitextractor/parser/clone_gitcli.go
+++ b/backend/plugins/gitextractor/parser/clone_gitcli.go
@@ -124,13 +124,11 @@ func (g *GitcliCloner) execGitCloneCommand(ctx 
plugin.SubTaskContext, localDir s
                //    
https://stackoverflow.com/questions/23708231/git-shallow-clone-clone-depth-misses-remote-branches
 
                // 1. clone the repo with depth 1
-               if err := g.execGitCommand(ctx, "clone", taskData.Options.Url, 
localDir, "--depth=1", "--bare"); err != nil {
+               cloneArgs := append([]string{"clone", taskData.Options.Url, 
localDir, "--depth=1", "--bare"}, args...)
+               if err := g.execGitCommand(ctx, cloneArgs...); err != nil {
                        return err
                }
-               // 2. set remote for all branches
-               // if err := g.execGitCommandIn(ctx, localDir, "remote", 
"set-branches", "origin", "'*'"); err != nil {
-               // return err
-               // } // someshow it fails siliently on my local machine, don't 
know why
+               // 2. configure to fetch all branches from the remote server so 
we can collect new commits from them
                gitConfig, err := os.OpenFile(path.Join(localDir, "config"), 
os.O_APPEND|os.O_WRONLY, 0644)
                if err != nil {
                        return errors.Default.Wrap(err, "failed to open git 
config file")
@@ -139,14 +137,20 @@ func (g *GitcliCloner) execGitCloneCommand(ctx 
plugin.SubTaskContext, localDir s
                if err != nil {
                        return errors.Default.Wrap(err, "failed to write to git 
config file")
                }
-               // 3. fetch all new commits from all branches since the given 
time
-               args = append([]string{"fetch", "--progress", 
fmt.Sprintf("--shallow-since=%s", since.Format(time.RFC3339))}, args...)
+               // 3. fetch all branches with depth=1 so the next step would 
collect less commits
+               // (I don't know why, but it reduced total number of commits 
from 18k to 7k on https://gitlab.com/gitlab-org/gitlab-foss.git with the same 
parameters)
+               fetchBranchesArgs := append([]string{"fetch", "--depth=1", 
"origin"}, args...)
+               if err := g.execGitCommandIn(ctx, localDir, 
fetchBranchesArgs...); err != nil {
+                       return errors.Default.Wrap(err, "failed to fetch all 
branches from the remote server")
+               }
+               // 4. fetch all new commits from all branches since the given 
time
+               args = append([]string{"fetch", 
fmt.Sprintf("--shallow-since=%s", since.Format(time.RFC3339))}, args...)
                if err := g.execGitCommandIn(ctx, localDir, args...); err != 
nil {
                        g.logger.Warn(err, "shallow fetch failed")
                }
                return nil
        } else {
-               args = append([]string{"clone", taskData.Options.Url, localDir, 
"--progress", "--bare"}, args...)
+               args = append([]string{"clone", taskData.Options.Url, localDir, 
"--bare"}, args...)
                return g.execGitCommand(ctx, args...)
        }
 }

Reply via email to