This is an automated email from the ASF dual-hosted git repository.
abeizn pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git
The following commit(s) were added to refs/heads/main by this push:
new 2090fecd8 refactor: fetch branches before shallow fetch to reduce the
total commits collected (#7760)
2090fecd8 is described below
commit 2090fecd8629daa14b129dc81d160c4f20e18c6e
Author: Klesh Wong <[email protected]>
AuthorDate: Thu Jul 18 15:50:57 2024 +0800
refactor: fetch branches before shallow fetch to reduce the total commits
collected (#7760)
---
backend/plugins/gitextractor/impl/impl.go | 8 +++++---
backend/plugins/gitextractor/parser/clone_gitcli.go | 20 ++++++++++++--------
2 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/backend/plugins/gitextractor/impl/impl.go
b/backend/plugins/gitextractor/impl/impl.go
index cd96d8198..778e0e32d 100644
--- a/backend/plugins/gitextractor/impl/impl.go
+++ b/backend/plugins/gitextractor/impl/impl.go
@@ -108,9 +108,11 @@ func (p GitExtractor) PrepareTaskData(taskCtx
plugin.TaskContext, options map[st
func (p GitExtractor) Close(taskCtx plugin.TaskContext) errors.Error {
if taskData, ok := taskCtx.GetData().(*parser.GitExtractorTaskData); ok
{
- if taskData.GitRepo != nil {
- if err := taskData.GitRepo.Close(taskCtx.GetContext());
err != nil {
- return errors.Convert(err)
+ if
!taskCtx.GetConfigReader().GetBool("GIT_EXTRACTOR_KEEP_REPO") {
+ if taskData.GitRepo != nil {
+ if err :=
taskData.GitRepo.Close(taskCtx.GetContext()); err != nil {
+ return errors.Convert(err)
+ }
}
}
}
diff --git a/backend/plugins/gitextractor/parser/clone_gitcli.go
b/backend/plugins/gitextractor/parser/clone_gitcli.go
index 184dc26ff..bd7e7a7ab 100644
--- a/backend/plugins/gitextractor/parser/clone_gitcli.go
+++ b/backend/plugins/gitextractor/parser/clone_gitcli.go
@@ -124,13 +124,11 @@ func (g *GitcliCloner) execGitCloneCommand(ctx
plugin.SubTaskContext, localDir s
//
https://stackoverflow.com/questions/23708231/git-shallow-clone-clone-depth-misses-remote-branches
// 1. clone the repo with depth 1
- if err := g.execGitCommand(ctx, "clone", taskData.Options.Url,
localDir, "--depth=1", "--bare"); err != nil {
+ cloneArgs := append([]string{"clone", taskData.Options.Url,
localDir, "--depth=1", "--bare"}, args...)
+ if err := g.execGitCommand(ctx, cloneArgs...); err != nil {
return err
}
- // 2. set remote for all branches
- // if err := g.execGitCommandIn(ctx, localDir, "remote",
"set-branches", "origin", "'*'"); err != nil {
- // return err
- // } // someshow it fails siliently on my local machine, don't
know why
+ // 2. configure to fetch all branches from the remote server so
we can collect new commits from them
gitConfig, err := os.OpenFile(path.Join(localDir, "config"),
os.O_APPEND|os.O_WRONLY, 0644)
if err != nil {
return errors.Default.Wrap(err, "failed to open git
config file")
@@ -139,14 +137,20 @@ func (g *GitcliCloner) execGitCloneCommand(ctx
plugin.SubTaskContext, localDir s
if err != nil {
return errors.Default.Wrap(err, "failed to write to git
config file")
}
- // 3. fetch all new commits from all branches since the given
time
- args = append([]string{"fetch", "--progress",
fmt.Sprintf("--shallow-since=%s", since.Format(time.RFC3339))}, args...)
+ // 3. fetch all branches with depth=1 so the next step would
collect less commits
+ // (I don't know why, but it reduced total number of commits
from 18k to 7k on https://gitlab.com/gitlab-org/gitlab-foss.git with the same
parameters)
+ fetchBranchesArgs := append([]string{"fetch", "--depth=1",
"origin"}, args...)
+ if err := g.execGitCommandIn(ctx, localDir,
fetchBranchesArgs...); err != nil {
+ return errors.Default.Wrap(err, "failed to fetch all
branches from the remote server")
+ }
+ // 4. fetch all new commits from all branches since the given
time
+ args = append([]string{"fetch",
fmt.Sprintf("--shallow-since=%s", since.Format(time.RFC3339))}, args...)
if err := g.execGitCommandIn(ctx, localDir, args...); err !=
nil {
g.logger.Warn(err, "shallow fetch failed")
}
return nil
} else {
- args = append([]string{"clone", taskData.Options.Url, localDir,
"--progress", "--bare"}, args...)
+ args = append([]string{"clone", taskData.Options.Url, localDir,
"--bare"}, args...)
return g.execGitCommand(ctx, args...)
}
}