This is an automated email from the ASF dual-hosted git repository.
abeizn pushed a commit to branch release-v1.0
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git
The following commit(s) were added to refs/heads/release-v1.0 by this push:
new 692a2abb8 fix: addition of some commits are ridiculous huge (#7719)
(#7720)
692a2abb8 is described below
commit 692a2abb855b390a5a64fea4e1ed0d14f4f13205
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Jul 11 16:05:06 2024 +0800
fix: addition of some commits are ridiculous huge (#7719) (#7720)
Co-authored-by: Klesh Wong <[email protected]>
---
.../plugins/gitextractor/parser/clone_gitcli.go | 50 +++++++++++-----------
backend/plugins/gitextractor/parser/repo_gogit.go | 14 ++++++
.../plugins/gitextractor/parser/repo_libgit2.go | 15 +++++--
3 files changed, 49 insertions(+), 30 deletions(-)
diff --git a/backend/plugins/gitextractor/parser/clone_gitcli.go
b/backend/plugins/gitextractor/parser/clone_gitcli.go
index 9f2ca7ad5..89a004239 100644
--- a/backend/plugins/gitextractor/parser/clone_gitcli.go
+++ b/backend/plugins/gitextractor/parser/clone_gitcli.go
@@ -89,11 +89,7 @@ func (g *GitcliCloner) CloneRepo(ctx plugin.SubTaskContext,
localDir string) err
}
- cmd, err := g.buildCloneCommand(ctx, localDir, since)
- if err != nil {
- return err
- }
- err = g.execCloneCommand(cmd)
+ err := g.execGitCloneCommand(ctx, localDir, since)
if err != nil {
return err
}
@@ -101,13 +97,11 @@ func (g *GitcliCloner) CloneRepo(ctx
plugin.SubTaskContext, localDir string) err
if since != nil {
// fixes error described on
https://stackoverflow.com/questions/63878612/git-fatal-error-in-object-unshallow-sha-1
// It might be casued by the commit which being deepen has
mulitple parent(e.g. a merge commit), not sure.
- repackCmd := exec.CommandContext(ctx.GetContext(), "git", "-C",
localDir, "repack", "-d")
- if err := repackCmd.Run(); err != nil {
+ if err := g.execGitCommand(ctx, "-C", localDir, "repack",
"-d"); err != nil {
return errors.Default.Wrap(err, "failed to repack the
repo")
}
- deepenCmd := exec.CommandContext(ctx.GetContext(), "git", "-C",
localDir, "fetch", "--deepen=1")
// deepen would fail on a EMPTY repo, ignore the error
- if err := deepenCmd.Run(); err != nil {
+ if err := g.execGitCommand(ctx, "-C", localDir, "fetch",
"--deepen=1"); err != nil {
g.logger.Error(err, "failed to deepen the cloned repo")
}
}
@@ -119,9 +113,22 @@ func (g *GitcliCloner) CloneRepo(ctx
plugin.SubTaskContext, localDir string) err
return nil
}
-func (g *GitcliCloner) buildCloneCommand(ctx plugin.SubTaskContext, localDir
string, since *time.Time) (*exec.Cmd, errors.Error) {
+func (g *GitcliCloner) execGitCloneCommand(ctx plugin.SubTaskContext, localDir
string, since *time.Time) errors.Error {
taskData := ctx.GetData().(*GitExtractorTaskData)
args := []string{"clone", taskData.Options.Url, localDir, "--bare",
"--progress"}
+ if since != nil {
+ args = append(args, fmt.Sprintf("--shallow-since=%s",
since.Format(time.RFC3339)))
+ }
+ // support time after and diff sync
+ // support skipping blobs collection
+ if *taskData.Options.SkipCommitStat {
+ args = append(args, "--filter=blob:none")
+ }
+ return g.execGitCommand(ctx, args...)
+}
+
+func (g *GitcliCloner) execGitCommand(ctx plugin.SubTaskContext, args
...string) errors.Error {
+ taskData := ctx.GetData().(*GitExtractorTaskData)
env := []string{}
// support proxy
if taskData.ParsedURL.Scheme == "http" || taskData.ParsedURL.Scheme ==
"https" {
@@ -136,7 +143,7 @@ func (g *GitcliCloner) buildCloneCommand(ctx
plugin.SubTaskContext, localDir str
if taskData.Options.Proxy != "" {
parsedProxyURL, e := url.Parse(taskData.Options.Proxy)
if e != nil {
- return nil, errors.BadInput.Wrap(e, "failed to
parse the proxy URL")
+ return errors.BadInput.Wrap(e, "failed to parse
the proxy URL")
}
proxyCommand := "corkscrew"
sshCmdArgs = append(sshCmdArgs, "-o",
fmt.Sprintf(`ProxyCommand="%s %s %s %%h %%p"`, proxyCommand,
parsedProxyURL.Hostname(), parsedProxyURL.Port()))
@@ -146,16 +153,16 @@ func (g *GitcliCloner) buildCloneCommand(ctx
plugin.SubTaskContext, localDir str
pkFile, err := os.CreateTemp("", "gitext-pk")
if err != nil {
g.logger.Error(err, "create temp private key
file error")
- return nil, errors.Default.New("failed to
handle the private key")
+ return errors.Default.New("failed to handle the
private key")
}
if _, e :=
pkFile.WriteString(taskData.Options.PrivateKey + "\n"); e != nil {
g.logger.Error(err, "write private key file
error")
- return nil, errors.Default.New("failed to write
the private key")
+ return errors.Default.New("failed to write the
private key")
}
pkFile.Close()
if e := os.Chmod(pkFile.Name(), 0600); e != nil {
g.logger.Error(err, "chmod private key file
error")
- return nil, errors.Default.New("failed to
modify the private key")
+ return errors.Default.New("failed to modify the
private key")
}
if taskData.Options.Passphrase != "" {
@@ -169,7 +176,7 @@ func (g *GitcliCloner) buildCloneCommand(ctx
plugin.SubTaskContext, localDir str
if ppout, pperr := pp.CombinedOutput(); pperr
!= nil {
g.logger.Error(pperr, "change private
key passphrase error")
g.logger.Info(string(ppout))
- return nil, errors.Default.New("failed
to decrypt the private key")
+ return errors.Default.New("failed to
decrypt the private key")
}
}
defer os.Remove(pkFile.Name())
@@ -179,22 +186,13 @@ func (g *GitcliCloner) buildCloneCommand(ctx
plugin.SubTaskContext, localDir str
env = append(env, fmt.Sprintf("GIT_SSH_COMMAND=ssh %s",
strings.Join(sshCmdArgs, " ")))
}
}
- // support time after and diff sync
- if since != nil {
- args = append(args, fmt.Sprintf("--shallow-since=%s",
since.Format(time.RFC3339)))
- }
- // support skipping blobs collection
- if *taskData.Options.SkipCommitStat {
- args = append(args, "--filter=blob:none")
- }
- // fmt.Printf("args: %v\n", args)
g.logger.Debug("git %v", args)
cmd := exec.CommandContext(ctx.GetContext(), "git", args...)
cmd.Env = env
- return cmd, nil
+ return g.execCommand(cmd)
}
-func (g *GitcliCloner) execCloneCommand(cmd *exec.Cmd) errors.Error {
+func (g *GitcliCloner) execCommand(cmd *exec.Cmd) errors.Error {
stdout, err := cmd.StdoutPipe()
if err != nil {
g.logger.Error(err, "stdout pipe error")
diff --git a/backend/plugins/gitextractor/parser/repo_gogit.go
b/backend/plugins/gitextractor/parser/repo_gogit.go
index d98ec3d95..baaa4e42b 100644
--- a/backend/plugins/gitextractor/parser/repo_gogit.go
+++ b/backend/plugins/gitextractor/parser/repo_gogit.go
@@ -301,6 +301,20 @@ func (r *GogitRepoCollector) CollectCommits(subtaskCtx
plugin.SubTaskContext) (e
default:
}
commitSha := commit.Hash.String()
+
+ if commit.NumParents() != 0 {
+ _, err := commit.Parents().Next()
+ if err != nil {
+ if err == plumbing.ErrObjectNotFound {
+ // Skip calculating commit statistics
when there are parent commits, but the first one cannot be fetched from the ODB.
+ // This usually happens during a
shallow clone for incremental collection. Otherwise, we might end up overwriting
+ // the correct addition/deletion data
in the database with an absurdly large addition number.
+ r.logger.Info("skip commit %s because
it has no parent commit", commitSha)
+ return nil
+ }
+ return err
+ }
+ }
codeCommit := &code.Commit{
Sha: commitSha,
Message: commit.Message,
diff --git a/backend/plugins/gitextractor/parser/repo_libgit2.go
b/backend/plugins/gitextractor/parser/repo_libgit2.go
index f4371204f..78451feb0 100644
--- a/backend/plugins/gitextractor/parser/repo_libgit2.go
+++ b/backend/plugins/gitextractor/parser/repo_libgit2.go
@@ -279,6 +279,17 @@ func (r *Libgit2RepoCollector) CollectCommits(subtaskCtx
plugin.SubTaskContext)
if commit == nil {
return nil
}
+ var parent *git.Commit
+ if commit.ParentCount() > 0 {
+ parent = commit.Parent(0)
+ // Skip calculating commit statistics when there are
parent commits, but the first one cannot be fetched from the ODB.
+ // This usually happens during a shallow clone for
incremental collection. Otherwise, we might end up overwriting
+ // the correct addition/deletion data in the database
with an absurdly large addition number.
+ if parent == nil {
+ r.logger.Info("skip commit %s because it has no
parent commit", commit.Id().String())
+ return nil
+ }
+ }
commitSha := commit.Id().String()
r.logger.Debug("process commit: %s", commitSha)
c := &code.Commit{
@@ -303,10 +314,6 @@ func (r *Libgit2RepoCollector) CollectCommits(subtaskCtx
plugin.SubTaskContext)
if err != nil {
return err
}
- var parent *git.Commit
- if commit.ParentCount() > 0 {
- parent = commit.Parent(0)
- }
if !*taskOpts.SkipCommitStat {
var stats *git.DiffStats