This is an automated email from the ASF dual-hosted git repository.

abeizn pushed a commit to branch release-v1.0
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git


The following commit(s) were added to refs/heads/release-v1.0 by this push:
     new 692a2abb8 fix: addition of some commits are ridiculous huge (#7719) 
(#7720)
692a2abb8 is described below

commit 692a2abb855b390a5a64fea4e1ed0d14f4f13205
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Jul 11 16:05:06 2024 +0800

    fix: addition of some commits are ridiculous huge (#7719) (#7720)
    
    Co-authored-by: Klesh Wong <[email protected]>
---
 .../plugins/gitextractor/parser/clone_gitcli.go    | 50 +++++++++++-----------
 backend/plugins/gitextractor/parser/repo_gogit.go  | 14 ++++++
 .../plugins/gitextractor/parser/repo_libgit2.go    | 15 +++++--
 3 files changed, 49 insertions(+), 30 deletions(-)

diff --git a/backend/plugins/gitextractor/parser/clone_gitcli.go 
b/backend/plugins/gitextractor/parser/clone_gitcli.go
index 9f2ca7ad5..89a004239 100644
--- a/backend/plugins/gitextractor/parser/clone_gitcli.go
+++ b/backend/plugins/gitextractor/parser/clone_gitcli.go
@@ -89,11 +89,7 @@ func (g *GitcliCloner) CloneRepo(ctx plugin.SubTaskContext, 
localDir string) err
 
        }
 
-       cmd, err := g.buildCloneCommand(ctx, localDir, since)
-       if err != nil {
-               return err
-       }
-       err = g.execCloneCommand(cmd)
+       err := g.execGitCloneCommand(ctx, localDir, since)
        if err != nil {
                return err
        }
@@ -101,13 +97,11 @@ func (g *GitcliCloner) CloneRepo(ctx 
plugin.SubTaskContext, localDir string) err
        if since != nil {
                // fixes error described on 
https://stackoverflow.com/questions/63878612/git-fatal-error-in-object-unshallow-sha-1
                // It might be casued by the commit which being deepen has 
mulitple parent(e.g. a merge commit), not sure.
-               repackCmd := exec.CommandContext(ctx.GetContext(), "git", "-C", 
localDir, "repack", "-d")
-               if err := repackCmd.Run(); err != nil {
+               if err := g.execGitCommand(ctx, "-C", localDir, "repack", 
"-d"); err != nil {
                        return errors.Default.Wrap(err, "failed to repack the 
repo")
                }
-               deepenCmd := exec.CommandContext(ctx.GetContext(), "git", "-C", 
localDir, "fetch", "--deepen=1")
                // deepen would fail on a EMPTY repo, ignore the error
-               if err := deepenCmd.Run(); err != nil {
+               if err := g.execGitCommand(ctx, "-C", localDir, "fetch", 
"--deepen=1"); err != nil {
                        g.logger.Error(err, "failed to deepen the cloned repo")
                }
        }
@@ -119,9 +113,22 @@ func (g *GitcliCloner) CloneRepo(ctx 
plugin.SubTaskContext, localDir string) err
        return nil
 }
 
-func (g *GitcliCloner) buildCloneCommand(ctx plugin.SubTaskContext, localDir 
string, since *time.Time) (*exec.Cmd, errors.Error) {
+func (g *GitcliCloner) execGitCloneCommand(ctx plugin.SubTaskContext, localDir 
string, since *time.Time) errors.Error {
        taskData := ctx.GetData().(*GitExtractorTaskData)
        args := []string{"clone", taskData.Options.Url, localDir, "--bare", 
"--progress"}
+       if since != nil {
+               args = append(args, fmt.Sprintf("--shallow-since=%s", 
since.Format(time.RFC3339)))
+       }
+       // support time after and diff sync
+       // support skipping blobs collection
+       if *taskData.Options.SkipCommitStat {
+               args = append(args, "--filter=blob:none")
+       }
+       return g.execGitCommand(ctx, args...)
+}
+
+func (g *GitcliCloner) execGitCommand(ctx plugin.SubTaskContext, args 
...string) errors.Error {
+       taskData := ctx.GetData().(*GitExtractorTaskData)
        env := []string{}
        // support proxy
        if taskData.ParsedURL.Scheme == "http" || taskData.ParsedURL.Scheme == 
"https" {
@@ -136,7 +143,7 @@ func (g *GitcliCloner) buildCloneCommand(ctx 
plugin.SubTaskContext, localDir str
                if taskData.Options.Proxy != "" {
                        parsedProxyURL, e := url.Parse(taskData.Options.Proxy)
                        if e != nil {
-                               return nil, errors.BadInput.Wrap(e, "failed to 
parse the proxy URL")
+                               return errors.BadInput.Wrap(e, "failed to parse 
the proxy URL")
                        }
                        proxyCommand := "corkscrew"
                        sshCmdArgs = append(sshCmdArgs, "-o", 
fmt.Sprintf(`ProxyCommand="%s %s %s %%h %%p"`, proxyCommand, 
parsedProxyURL.Hostname(), parsedProxyURL.Port()))
@@ -146,16 +153,16 @@ func (g *GitcliCloner) buildCloneCommand(ctx 
plugin.SubTaskContext, localDir str
                        pkFile, err := os.CreateTemp("", "gitext-pk")
                        if err != nil {
                                g.logger.Error(err, "create temp private key 
file error")
-                               return nil, errors.Default.New("failed to 
handle the private key")
+                               return errors.Default.New("failed to handle the 
private key")
                        }
                        if _, e := 
pkFile.WriteString(taskData.Options.PrivateKey + "\n"); e != nil {
                                g.logger.Error(err, "write private key file 
error")
-                               return nil, errors.Default.New("failed to write 
the  private key")
+                               return errors.Default.New("failed to write the  
private key")
                        }
                        pkFile.Close()
                        if e := os.Chmod(pkFile.Name(), 0600); e != nil {
                                g.logger.Error(err, "chmod private key file 
error")
-                               return nil, errors.Default.New("failed to 
modify the private key")
+                               return errors.Default.New("failed to modify the 
private key")
                        }
 
                        if taskData.Options.Passphrase != "" {
@@ -169,7 +176,7 @@ func (g *GitcliCloner) buildCloneCommand(ctx 
plugin.SubTaskContext, localDir str
                                if ppout, pperr := pp.CombinedOutput(); pperr 
!= nil {
                                        g.logger.Error(pperr, "change private 
key passphrase error")
                                        g.logger.Info(string(ppout))
-                                       return nil, errors.Default.New("failed 
to decrypt the private key")
+                                       return errors.Default.New("failed to 
decrypt the private key")
                                }
                        }
                        defer os.Remove(pkFile.Name())
@@ -179,22 +186,13 @@ func (g *GitcliCloner) buildCloneCommand(ctx 
plugin.SubTaskContext, localDir str
                        env = append(env, fmt.Sprintf("GIT_SSH_COMMAND=ssh %s", 
strings.Join(sshCmdArgs, " ")))
                }
        }
-       // support time after and diff sync
-       if since != nil {
-               args = append(args, fmt.Sprintf("--shallow-since=%s", 
since.Format(time.RFC3339)))
-       }
-       // support skipping blobs collection
-       if *taskData.Options.SkipCommitStat {
-               args = append(args, "--filter=blob:none")
-       }
-       // fmt.Printf("args: %v\n", args)
        g.logger.Debug("git %v", args)
        cmd := exec.CommandContext(ctx.GetContext(), "git", args...)
        cmd.Env = env
-       return cmd, nil
+       return g.execCommand(cmd)
 }
 
-func (g *GitcliCloner) execCloneCommand(cmd *exec.Cmd) errors.Error {
+func (g *GitcliCloner) execCommand(cmd *exec.Cmd) errors.Error {
        stdout, err := cmd.StdoutPipe()
        if err != nil {
                g.logger.Error(err, "stdout pipe error")
diff --git a/backend/plugins/gitextractor/parser/repo_gogit.go 
b/backend/plugins/gitextractor/parser/repo_gogit.go
index d98ec3d95..baaa4e42b 100644
--- a/backend/plugins/gitextractor/parser/repo_gogit.go
+++ b/backend/plugins/gitextractor/parser/repo_gogit.go
@@ -301,6 +301,20 @@ func (r *GogitRepoCollector) CollectCommits(subtaskCtx 
plugin.SubTaskContext) (e
                default:
                }
                commitSha := commit.Hash.String()
+
+               if commit.NumParents() != 0 {
+                       _, err := commit.Parents().Next()
+                       if err != nil {
+                               if err == plumbing.ErrObjectNotFound {
+                                       // Skip calculating commit statistics 
when there are parent commits, but the first one cannot be fetched from the ODB.
+                                       // This usually happens during a 
shallow clone for incremental collection. Otherwise, we might end up overwriting
+                                       // the correct addition/deletion data 
in the database with an absurdly large addition number.
+                                       r.logger.Info("skip commit %s because 
it has no parent commit", commitSha)
+                                       return nil
+                               }
+                               return err
+                       }
+               }
                codeCommit := &code.Commit{
                        Sha:            commitSha,
                        Message:        commit.Message,
diff --git a/backend/plugins/gitextractor/parser/repo_libgit2.go 
b/backend/plugins/gitextractor/parser/repo_libgit2.go
index f4371204f..78451feb0 100644
--- a/backend/plugins/gitextractor/parser/repo_libgit2.go
+++ b/backend/plugins/gitextractor/parser/repo_libgit2.go
@@ -279,6 +279,17 @@ func (r *Libgit2RepoCollector) CollectCommits(subtaskCtx 
plugin.SubTaskContext)
                if commit == nil {
                        return nil
                }
+               var parent *git.Commit
+               if commit.ParentCount() > 0 {
+                       parent = commit.Parent(0)
+                       // Skip calculating commit statistics when there are 
parent commits, but the first one cannot be fetched from the ODB.
+                       // This usually happens during a shallow clone for 
incremental collection. Otherwise, we might end up overwriting
+                       // the correct addition/deletion data in the database 
with an absurdly large addition number.
+                       if parent == nil {
+                               r.logger.Info("skip commit %s because it has no 
parent commit", commit.Id().String())
+                               return nil
+                       }
+               }
                commitSha := commit.Id().String()
                r.logger.Debug("process commit: %s", commitSha)
                c := &code.Commit{
@@ -303,10 +314,6 @@ func (r *Libgit2RepoCollector) CollectCommits(subtaskCtx 
plugin.SubTaskContext)
                if err != nil {
                        return err
                }
-               var parent *git.Commit
-               if commit.ParentCount() > 0 {
-                       parent = commit.Parent(0)
-               }
 
                if !*taskOpts.SkipCommitStat {
                        var stats *git.DiffStats

Reply via email to