This is an automated email from the ASF dual-hosted git repository.

lynwee pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git


The following commit(s) were added to refs/heads/main by this push:
     new 14d5dd1cf feat #6615, replace libgit2 with go-git. (#6701)
14d5dd1cf is described below

commit 14d5dd1cfc6a1c9398018622a3b3adc317ec11c7
Author: Lynwee <[email protected]>
AuthorDate: Tue Mar 5 11:42:36 2024 +0800

    feat #6615, replace libgit2 with go-git. (#6701)
    
    * feat(gitextractor): add "CountTags/CountBranches/CountCommits"WithGoGit
    
    * feat(gitextractor): stash codes
    
    * fix(gitextractor): refact codes to make it easy to handle
    
    * refactor(gitextractor): add an interface for replacing libgit2 with go-git
    
    * feat(gitextractor): stash current works
    
    * feat(gitextractor): support collect repo data with go-git
    
    * fix(mod): remove replacement
    
    * fix(gitextractor): fix lint errors
    
    * fix(gitextractor): works with lower golang version
    
    * fix(gitextractor): fix lint errors
    
    * fix(gitextractor): fix test errors
    
    * fix(unittest): try to fix
    
    * fix(gitextractor): fix test
    
    * fix(gitextractor): fix test
    
    * fix(gitextractor): update config the value of 
'USE_GO_GIT_IN_GIT_EXTRACTOR'
    
    * fix(gitextractor): remove hardcoding path
---
 .gitignore                                         |   5 +-
 backend/core/models/domainlayer/domainlayer.go     |   7 +
 backend/go.mod                                     |  14 +-
 backend/go.sum                                     |  15 +
 backend/plugins/gitextractor/impl/impl.go          |   2 +-
 backend/plugins/gitextractor/main.go               |   2 +-
 backend/plugins/gitextractor/parser/clone.go       |  59 ++-
 .../gitextractor/parser/clone_gogit.go}            |  22 +-
 .../gitextractor/parser/clone_libgit2.go}          |  22 +-
 backend/plugins/gitextractor/parser/clone_test.go  |  75 +--
 backend/plugins/gitextractor/parser/repo.go        | 578 +--------------------
 .../plugins/gitextractor/parser/repo_creator.go    |  24 +-
 backend/plugins/gitextractor/parser/repo_gogit.go  | 546 +++++++++++++++++++
 .../parser/{repo.go => repo_libgit2.go}            |  48 +-
 backend/plugins/gitextractor/parser/repo_test.go   | 302 +++++++++++
 backend/plugins/gitextractor/store/csv.go          |   6 +
 backend/plugins/gitextractor/tasks/clone.go        |  44 +-
 .../gitextractor/tasks/git_repo_collector.go       |  25 +-
 env.example                                        |   4 +
 19 files changed, 1098 insertions(+), 702 deletions(-)

diff --git a/.gitignore b/.gitignore
index a9d2bbe8a..0aa085362 100644
--- a/.gitignore
+++ b/.gitignore
@@ -159,4 +159,7 @@ backend/server/api/docs/docs.go
 # python
 *.pyc
 __pycache__
-venv
\ No newline at end of file
+venv
+
+/backend/plugins/gitextractor/parser/output*
+/backend/plugins/gitextractor/parser/demo_repo/
\ No newline at end of file
diff --git a/backend/core/models/domainlayer/domainlayer.go 
b/backend/core/models/domainlayer/domainlayer.go
index 2e899173d..f11570d40 100644
--- a/backend/core/models/domainlayer/domainlayer.go
+++ b/backend/core/models/domainlayer/domainlayer.go
@@ -37,3 +37,10 @@ func NewDomainEntity(id string) DomainEntity {
                NoPKModel: common.NewNoPKModel(),
        }
 }
+
+func NewDomainEntityExtended(id string) DomainEntityExtended {
+       return DomainEntityExtended{
+               Id:        id,
+               NoPKModel: common.NewNoPKModel(),
+       }
+}
diff --git a/backend/go.mod b/backend/go.mod
index 8d0f833b8..ab9e5f250 100644
--- a/backend/go.mod
+++ b/backend/go.mod
@@ -7,7 +7,7 @@ require (
        github.com/gin-contrib/cors v1.3.1
        github.com/gin-gonic/gin v1.9.1
        github.com/go-errors/errors v1.4.2 // indirect
-       github.com/go-git/go-git/v5 v5.9.0
+       github.com/go-git/go-git/v5 v5.11.0
        github.com/go-playground/validator/v10 v10.14.1
        github.com/gocarina/gocsv v0.0.0-20220707092902-b9da1f06c77e
        github.com/google/uuid v1.3.0
@@ -24,13 +24,13 @@ require (
        github.com/spf13/cast v1.5.1
        github.com/spf13/cobra v1.5.0
        github.com/spf13/viper v1.8.1
-       github.com/stretchr/testify v1.8.3
+       github.com/stretchr/testify v1.8.4
        github.com/swaggo/files v1.0.1
        github.com/swaggo/gin-swagger v1.6.0
        github.com/swaggo/swag v1.16.1
        github.com/tidwall/gjson v1.14.3
        github.com/viant/afs v1.16.0
-       golang.org/x/crypto v0.14.0
+       golang.org/x/crypto v0.16.0
        golang.org/x/exp v0.0.0-20221028150844-83b7d23a625f
        golang.org/x/oauth2 v0.0.0-20210402161424-2e8d93401602
        golang.org/x/sync v0.3.0
@@ -106,9 +106,9 @@ require (
        github.com/ugorji/go/codec v1.2.11 // indirect
        github.com/xanzy/ssh-agent v0.3.3 // indirect
        golang.org/x/arch v0.3.0 // indirect
-       golang.org/x/net v0.16.0 // indirect
-       golang.org/x/sys v0.13.0 // indirect
-       golang.org/x/text v0.13.0 // indirect
+       golang.org/x/net v0.19.0 // indirect
+       golang.org/x/sys v0.15.0 // indirect
+       golang.org/x/text v0.14.0 // indirect
        golang.org/x/tools v0.13.0 // indirect
        google.golang.org/appengine v1.6.7 // indirect
        google.golang.org/protobuf v1.30.0 // indirect
@@ -123,5 +123,3 @@ require (
        github.com/golang-jwt/jwt/v5 v5.0.0-rc.1
        golang.org/x/mod v0.13.0
 )
-
-//replace github.com/apache/incubator-devlake => ./
diff --git a/backend/go.sum b/backend/go.sum
index f8e34f61a..7b42dbbf9 100644
--- a/backend/go.sum
+++ b/backend/go.sum
@@ -171,8 +171,11 @@ github.com/go-git/gcfg 
v1.5.1-0.20230307220236-3a3c6141e376/go.mod h1:an3vInlBmS
 github.com/go-git/go-billy/v5 v5.5.0 
h1:yEY4yhzCDuMGSv83oGxiBotRzhwhNr8VZyphhiu+mTU=
 github.com/go-git/go-billy/v5 v5.5.0/go.mod 
h1:hmexnoNsr2SJU1Ju67OaNz5ASJY3+sHgFRpCtpDCKow=
 github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20230305113008-0c11038e723f 
h1:Pz0DHeFij3XFhoBRGUDPzSJ+w2UcK5/0JvF8DRI58r8=
+github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399 
h1:eMje31YglSBqCdIqdhKBW8lokaMrL3uTkpGYlE2OOT4=
 github.com/go-git/go-git/v5 v5.9.0 
h1:cD9SFA7sHVRdJ7AYck1ZaAa/yeuBvGPxwXDL8cxrObY=
 github.com/go-git/go-git/v5 v5.9.0/go.mod 
h1:RKIqga24sWdMGZF+1Ekv9kylsDz6LzdTSI2s/OsZWE0=
+github.com/go-git/go-git/v5 v5.11.0 
h1:XIZc1p+8YzypNr34itUfSvYJcv+eYdTnTvOZ2vD3cA4=
+github.com/go-git/go-git/v5 v5.11.0/go.mod 
h1:6GFcX2P3NM7FPBfpePbpLd21XxsgdAt+lKqXmCUiUCY=
 github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod 
h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod 
h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod 
h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
@@ -272,6 +275,7 @@ github.com/google/go-cmp v0.5.3/go.mod 
h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
 github.com/google/go-cmp v0.5.4/go.mod 
h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.5/go.mod 
h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-querystring v1.0.0/go.mod 
h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=
 github.com/google/gofuzz v1.0.0/go.mod 
h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/martian v2.1.0+incompatible/go.mod 
h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
@@ -621,6 +625,8 @@ github.com/stretchr/testify v1.8.1/go.mod 
h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o
 github.com/stretchr/testify v1.8.2/go.mod 
h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
 github.com/stretchr/testify v1.8.3 
h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY=
 github.com/stretchr/testify v1.8.3/go.mod 
h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/stretchr/testify v1.8.4 
h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
+github.com/stretchr/testify v1.8.4/go.mod 
h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
 github.com/subosito/gotenv v1.2.0 
h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s=
 github.com/subosito/gotenv v1.2.0/go.mod 
h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
 github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE=
@@ -714,6 +720,8 @@ golang.org/x/crypto 
v0.3.1-0.20221117191849-2c476679df9a/go.mod h1:hebNnKkNXi2Uz
 golang.org/x/crypto v0.7.0/go.mod 
h1:pYwdfH91IfpZVANVyUOhSIPZaFoJGxTFbZhFTx+dXZU=
 golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
 golang.org/x/crypto v0.14.0/go.mod 
h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
+golang.org/x/crypto v0.16.0 h1:mMMrFzRSCF0GvB7Ne27XVtVAaXLrPmgPC7/v0tkwHaY=
+golang.org/x/crypto v0.16.0/go.mod 
h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod 
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod 
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod 
h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@@ -806,6 +814,8 @@ golang.org/x/net v0.7.0/go.mod 
h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
 golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
 golang.org/x/net v0.16.0 h1:7eBu7KsSvFDtSXUIDbh3aqlK4DPsZ1rByC8PFfBThos=
 golang.org/x/net v0.16.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
+golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c=
+golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod 
h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod 
h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod 
h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -906,6 +916,8 @@ golang.org/x/sys v0.5.0/go.mod 
h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
 golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc=
+golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod 
h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod 
h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod 
h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
@@ -913,6 +925,7 @@ golang.org/x/term v0.2.0/go.mod 
h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
 golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
 golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
 golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
+golang.org/x/term v0.15.0 h1:y/Oo/a/q3IXu26lQgl04j/gjuBDOBlx7X6Om1j2CPW4=
 golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod 
h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod 
h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@@ -927,6 +940,8 @@ golang.org/x/text v0.7.0/go.mod 
h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
 golang.org/x/text v0.13.0/go.mod 
h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
+golang.org/x/text v0.14.0/go.mod 
h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod 
h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod 
h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod 
h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
diff --git a/backend/plugins/gitextractor/impl/impl.go 
b/backend/plugins/gitextractor/impl/impl.go
index 31eae69e4..d8d5413d4 100644
--- a/backend/plugins/gitextractor/impl/impl.go
+++ b/backend/plugins/gitextractor/impl/impl.go
@@ -74,7 +74,7 @@ func (p GitExtractor) PrepareTaskData(taskCtx 
plugin.TaskContext, options map[st
 func (p GitExtractor) Close(taskCtx plugin.TaskContext) errors.Error {
        if taskData, ok := taskCtx.GetData().(*tasks.GitExtractorTaskData); ok {
                if taskData.GitRepo != nil {
-                       if err := taskData.GitRepo.Close(); err != nil {
+                       if err := taskData.GitRepo.Close(taskCtx.GetContext()); 
err != nil {
                                return errors.Convert(err)
                        }
                }
diff --git a/backend/plugins/gitextractor/main.go 
b/backend/plugins/gitextractor/main.go
index a62f1561d..f9cc30490 100644
--- a/backend/plugins/gitextractor/main.go
+++ b/backend/plugins/gitextractor/main.go
@@ -88,7 +88,7 @@ func main() {
        if err != nil {
                panic(err)
        }
-       defer repo.Close()
+       defer repo.Close(ctx)
        if err = repo.CollectAll(subTaskCtx); err != nil {
                panic(err)
        }
diff --git a/backend/plugins/gitextractor/parser/clone.go 
b/backend/plugins/gitextractor/parser/clone.go
index 085e7e3f3..f8739e731 100644
--- a/backend/plugins/gitextractor/parser/clone.go
+++ b/backend/plugins/gitextractor/parser/clone.go
@@ -27,7 +27,6 @@ import (
        "github.com/apache/incubator-devlake/core/plugin"
        "github.com/go-git/go-git/v5/plumbing/protocol/packp/capability"
        "github.com/go-git/go-git/v5/plumbing/transport"
-
        "net"
        "net/http"
        neturl "net/url"
@@ -75,8 +74,8 @@ func cloneOverSSH(ctx plugin.SubTaskContext, url, dir, 
passphrase string, pk []b
        return nil
 }
 
-func (l *GitRepoCreator) CloneOverHTTP(ctx plugin.SubTaskContext, repoId, url, 
user, password, proxy string) (*GitRepo, errors.Error) {
-       return withTempDirectory(func(dir string) (*GitRepo, error) {
+func (l *GitRepoCreator) cloneOverHTTP(ctx plugin.SubTaskContext, withGoGit 
bool, repoId, url, user, password, proxy string) (RepoCollector, errors.Error) {
+       return withTempDirectory(func(dir string) (RepoCollector, error) {
                var data []byte
                buf := bytes.NewBuffer(data)
                done := make(chan struct{}, 1)
@@ -127,12 +126,15 @@ func (l *GitRepoCreator) CloneOverHTTP(ctx 
plugin.SubTaskContext, repoId, url, u
                        l.logger.Error(err, "PlainCloneContext")
                        return nil, err
                }
+               if withGoGit {
+                       return l.LocalGoGitRepo(dir, repoId)
+               }
                return l.LocalRepo(dir, repoId)
        })
 }
 
-func (l *GitRepoCreator) CloneOverSSH(ctx plugin.SubTaskContext, repoId, url, 
privateKey, passphrase string) (*GitRepo, errors.Error) {
-       return withTempDirectory(func(dir string) (*GitRepo, error) {
+func (l *GitRepoCreator) cloneOverSSH(ctx plugin.SubTaskContext, withGoGit 
bool, repoId, url, privateKey, passphrase string) (RepoCollector, errors.Error) 
{
+       return withTempDirectory(func(dir string) (RepoCollector, error) {
                pk, err := base64.StdEncoding.DecodeString(privateKey)
                if err != nil {
                        return nil, err
@@ -141,31 +143,13 @@ func (l *GitRepoCreator) CloneOverSSH(ctx 
plugin.SubTaskContext, repoId, url, pr
                if err != nil {
                        return nil, err
                }
+               if withGoGit {
+                       return l.LocalGoGitRepo(dir, repoId)
+               }
                return l.LocalRepo(dir, repoId)
        })
 }
 
-func withTempDirectory(f func(tempDir string) (*GitRepo, error)) (*GitRepo, 
errors.Error) {
-       dir, err := os.MkdirTemp("", "gitextractor")
-       if err != nil {
-               return nil, errors.Convert(err)
-       }
-       cleanup := func() {
-               _ = os.RemoveAll(dir)
-       }
-       defer func() {
-               if err != nil {
-                       cleanup()
-               }
-       }()
-       repo, err := f(dir)
-       if err != nil {
-               return nil, errors.Convert(err)
-       }
-       repo.cleanup = cleanup
-       return repo, errors.Convert(err)
-}
-
 func setCloneProgress(subTaskCtx plugin.SubTaskContext, cloneProgressInfo 
string) {
        if cloneProgressInfo == "" {
                return
@@ -208,3 +192,26 @@ func refreshCloneProgress(subTaskCtx 
plugin.SubTaskContext, done chan struct{},
 func isAzureRepo(ctx context.Context, repoUrl string) bool {
        return strings.Contains(repoUrl, "dev.azure.com")
 }
+
+func withTempDirectory(f func(tempDir string) (RepoCollector, error)) 
(RepoCollector, errors.Error) {
+       dir, err := os.MkdirTemp("", "gitextractor")
+       if err != nil {
+               return nil, errors.Convert(err)
+       }
+       cleanup := func() {
+               _ = os.RemoveAll(dir)
+       }
+       defer func() {
+               if err != nil {
+                       cleanup()
+               }
+       }()
+       repo, err := f(dir)
+       if err != nil {
+               return nil, errors.Convert(err)
+       }
+       if err := repo.SetCleanUp(cleanup); err != nil {
+               return nil, errors.Convert(err)
+       }
+       return repo, nil
+}
diff --git a/backend/core/models/domainlayer/domainlayer.go 
b/backend/plugins/gitextractor/parser/clone_gogit.go
similarity index 54%
copy from backend/core/models/domainlayer/domainlayer.go
copy to backend/plugins/gitextractor/parser/clone_gogit.go
index 2e899173d..93b392d03 100644
--- a/backend/core/models/domainlayer/domainlayer.go
+++ b/backend/plugins/gitextractor/parser/clone_gogit.go
@@ -15,25 +15,17 @@ See the License for the specific language governing 
permissions and
 limitations under the License.
 */
 
-package domainlayer
+package parser
 
 import (
-       "github.com/apache/incubator-devlake/core/models/common"
+       "github.com/apache/incubator-devlake/core/errors"
+       "github.com/apache/incubator-devlake/core/plugin"
 )
 
-type DomainEntity struct {
-       Id string `json:"id" gorm:"primaryKey;type:varchar(255);comment:This 
key is generated based on details from the original plugin"` // format: 
<Plugin>:<Entity>:<PK0>:<PK1>
-       common.NoPKModel
+func (l *GitRepoCreator) CloneGoGitRepoOverHTTP(ctx plugin.SubTaskContext, 
repoId, url, user, password, proxy string) (RepoCollector, errors.Error) {
+       return l.cloneOverHTTP(ctx, true, repoId, url, user, password, proxy)
 }
 
-type DomainEntityExtended struct {
-       Id string `json:"id" gorm:"primaryKey;type:varchar(500);comment:This 
key is generated based on details from the original plugin"` // format: 
<Plugin>:<Entity>:<PK0>:<PK1>
-       common.NoPKModel
-}
-
-func NewDomainEntity(id string) DomainEntity {
-       return DomainEntity{
-               Id:        id,
-               NoPKModel: common.NewNoPKModel(),
-       }
+func (l *GitRepoCreator) CloneGoGitRepoOverSSH(ctx plugin.SubTaskContext, 
repoId, url, privateKey, passphrase string) (RepoCollector, errors.Error) {
+       return l.cloneOverSSH(ctx, true, repoId, url, privateKey, passphrase)
 }
diff --git a/backend/core/models/domainlayer/domainlayer.go 
b/backend/plugins/gitextractor/parser/clone_libgit2.go
similarity index 54%
copy from backend/core/models/domainlayer/domainlayer.go
copy to backend/plugins/gitextractor/parser/clone_libgit2.go
index 2e899173d..20eafd282 100644
--- a/backend/core/models/domainlayer/domainlayer.go
+++ b/backend/plugins/gitextractor/parser/clone_libgit2.go
@@ -15,25 +15,17 @@ See the License for the specific language governing 
permissions and
 limitations under the License.
 */
 
-package domainlayer
+package parser
 
 import (
-       "github.com/apache/incubator-devlake/core/models/common"
+       "github.com/apache/incubator-devlake/core/errors"
+       "github.com/apache/incubator-devlake/core/plugin"
 )
 
-type DomainEntity struct {
-       Id string `json:"id" gorm:"primaryKey;type:varchar(255);comment:This 
key is generated based on details from the original plugin"` // format: 
<Plugin>:<Entity>:<PK0>:<PK1>
-       common.NoPKModel
+func (l *GitRepoCreator) CloneOverHTTP(ctx plugin.SubTaskContext, repoId, url, 
user, password, proxy string) (RepoCollector, errors.Error) {
+       return l.cloneOverHTTP(ctx, false, repoId, url, user, password, proxy)
 }
 
-type DomainEntityExtended struct {
-       Id string `json:"id" gorm:"primaryKey;type:varchar(500);comment:This 
key is generated based on details from the original plugin"` // format: 
<Plugin>:<Entity>:<PK0>:<PK1>
-       common.NoPKModel
-}
-
-func NewDomainEntity(id string) DomainEntity {
-       return DomainEntity{
-               Id:        id,
-               NoPKModel: common.NewNoPKModel(),
-       }
+func (l *GitRepoCreator) CloneOverSSH(ctx plugin.SubTaskContext, repoId, url, 
privateKey, passphrase string) (RepoCollector, errors.Error) {
+       return l.cloneOverSSH(ctx, false, repoId, url, privateKey, passphrase)
 }
diff --git a/backend/plugins/gitextractor/parser/clone_test.go 
b/backend/plugins/gitextractor/parser/clone_test.go
index 562998f39..3e63e2af6 100644
--- a/backend/plugins/gitextractor/parser/clone_test.go
+++ b/backend/plugins/gitextractor/parser/clone_test.go
@@ -19,12 +19,14 @@ package parser
 
 import (
        gocontext "context"
-       "fmt"
        "github.com/apache/incubator-devlake/core/config"
        "github.com/apache/incubator-devlake/core/context"
        "github.com/apache/incubator-devlake/core/dal"
        "github.com/apache/incubator-devlake/core/log"
        "github.com/apache/incubator-devlake/core/plugin"
+       "github.com/apache/incubator-devlake/impls/dalgorm"
+       "gorm.io/driver/mysql"
+       "gorm.io/gorm"
        "testing"
 )
 
@@ -40,7 +42,7 @@ func Test_setCloneProgress(t *testing.T) {
                {
                        name: "test-0",
                        args: args{
-                               subTaskCtx: testSubTaskContext{},
+                               subTaskCtx: &testSubTaskContext{},
                                cloneProgressInfo: `
                                        Enumerating objects: 103, done.
                                        Counting objects: 100% (103/103), done.
@@ -51,7 +53,7 @@ func Test_setCloneProgress(t *testing.T) {
                {
                        name: "test-1",
                        args: args{
-                               subTaskCtx: testSubTaskContext{},
+                               subTaskCtx: &testSubTaskContext{},
                                cloneProgressInfo: `
                                        Enumerating objects: 103, done.
                                        Counting objects: 100% (103/103), done.
@@ -66,64 +68,73 @@ func Test_setCloneProgress(t *testing.T) {
        }
 }
 
-type testSubTaskContext struct{}
+type testSubTaskContext struct {
+       current int
+       total   int
+       Name    string
+}
 
-func (testSubTaskContext) GetConfigReader() config.ConfigReader {
-       //TODO implement me
-       panic("implement me")
+func (ctx *testSubTaskContext) GetConfigReader() config.ConfigReader {
+       cfg := config.GetConfig()
+       return cfg
 }
 
-func (testSubTaskContext) GetConfig(name string) string {
-       //TODO implement me
-       panic("implement me")
+func (ctx *testSubTaskContext) GetConfig(name string) string {
+       return config.GetConfig().GetString(name)
 }
 
-func (testSubTaskContext) GetLogger() log.Logger {
-       //TODO implement me
-       panic("implement me")
+func (ctx *testSubTaskContext) GetLogger() log.Logger {
+       return logger
 }
 
-func (testSubTaskContext) NestedLogger(name string) context.BasicRes {
+func (ctx *testSubTaskContext) NestedLogger(name string) context.BasicRes {
        //TODO implement me
        panic("implement me")
 }
 
-func (testSubTaskContext) ReplaceLogger(logger log.Logger) context.BasicRes {
+func (ctx *testSubTaskContext) ReplaceLogger(logger log.Logger) 
context.BasicRes {
        //TODO implement me
        panic("implement me")
 }
 
-func (testSubTaskContext) GetDal() dal.Dal {
-       //TODO implement me
-       panic("implement me")
+func (ctx *testSubTaskContext) GetDal() dal.Dal {
+       //dsn := 
"mysql://root:[email protected]:3306/lake?charset=utf8mb4&parseTime=True&loc=UTC"
+       if runInLocal {
+               dsn := 
"merico:merico@tcp(127.0.0.1:3306)/lake?charset=utf8mb4&parseTime=True&loc=Local"
+               db, err := gorm.Open(mysql.Open(dsn), &gorm.Config{})
+               if err != nil {
+                       panic(err)
+               }
+               return dalgorm.NewDalgorm(db)
+       } else {
+               panic("implement me")
+       }
 }
 
-func (testSubTaskContext) GetName() string {
-       //TODO implement me
-       panic("implement me")
+func (ctx *testSubTaskContext) GetName() string {
+       return ctx.Name
 }
 
-func (testSubTaskContext) GetContext() gocontext.Context {
-       //TODO implement me
-       panic("implement me")
+func (ctx *testSubTaskContext) GetContext() gocontext.Context {
+       return gocontext.Background()
 }
 
-func (testSubTaskContext) GetData() interface{} {
+func (ctx *testSubTaskContext) GetData() interface{} {
        //TODO implement me
        panic("implement me")
 }
 
-func (testSubTaskContext) SetProgress(current int, total int) {
-       //TODO implement me
-       fmt.Printf("set current: %d, total: %d\n", current, total)
+func (ctx *testSubTaskContext) SetProgress(current int, total int) {
+       ctx.current = current
+       ctx.total = total
 }
 
-func (testSubTaskContext) IncProgress(quantity int) {
-       //TODO implement me
-       panic("implement me")
+func (ctx *testSubTaskContext) IncProgress(quantity int) {
+       ctx.current += quantity
+       ctx.total += quantity
 }
 
-func (testSubTaskContext) TaskContext() plugin.TaskContext {
+func (ctx *testSubTaskContext) TaskContext() plugin.TaskContext {
        //TODO implement me
        panic("implement me")
 }
diff --git a/backend/plugins/gitextractor/parser/repo.go 
b/backend/plugins/gitextractor/parser/repo.go
index 1a3a90d22..0077f2b52 100644
--- a/backend/plugins/gitextractor/parser/repo.go
+++ b/backend/plugins/gitextractor/parser/repo.go
@@ -19,577 +19,21 @@ package parser
 
 import (
        "context"
-       "crypto/sha256"
-       "encoding/hex"
-       "fmt"
-       "regexp"
-       "sort"
-       "strconv"
-
-       "github.com/apache/incubator-devlake/core/config"
-       "github.com/apache/incubator-devlake/core/dal"
-       "github.com/apache/incubator-devlake/core/errors"
-       "github.com/apache/incubator-devlake/core/log"
-       "github.com/apache/incubator-devlake/core/models/domainlayer"
-       "github.com/apache/incubator-devlake/core/models/domainlayer/code"
        "github.com/apache/incubator-devlake/core/plugin"
-       "github.com/apache/incubator-devlake/plugins/gitextractor/models"
-
-       git "github.com/libgit2/git2go/v33"
 )
 
-const SkipCommitFiles = "SKIP_COMMIT_FILES"
-
-var TypeNotMatchError = "the requested type does not match the type in the ODB"
-
-type GitRepo struct {
-       store   models.Store
-       logger  log.Logger
-       id      string
-       repo    *git.Repository
-       cleanup func()
-}
-
-// CollectAll The main parser subtask
-func (r *GitRepo) CollectAll(subtaskCtx plugin.SubTaskContext) errors.Error {
-       subtaskCtx.SetProgress(0, -1)
-       err := r.CollectTags(subtaskCtx)
-       if err != nil {
-               return err
-       }
-       err = r.CollectBranches(subtaskCtx)
-       if err != nil {
-               return err
-       }
-       err = r.CollectCommits(subtaskCtx)
-       if err != nil {
-               return err
-       }
-       return r.CollectDiffLine(subtaskCtx)
-}
-
-// Close resources
-func (r *GitRepo) Close() errors.Error {
-       defer func() {
-               if r.cleanup != nil {
-                       r.cleanup()
-               }
-       }()
-       return r.store.Close()
-}
-
-// CountTags Count git tags subtask
-func (r *GitRepo) CountTags() (int, errors.Error) {
-       tags, err := r.repo.Tags.List()
-       if err != nil {
-               return 0, errors.Convert(err)
-       }
-       return len(tags), nil
-}
-
-// CountBranches count the number of branches in a git repo
-func (r *GitRepo) CountBranches(ctx context.Context) (int, errors.Error) {
-       var branchIter *git.BranchIterator
-       branchIter, err := r.repo.NewBranchIterator(git.BranchAll)
-       if err != nil {
-               return 0, errors.Convert(err)
-       }
-       count := 0
-       err = branchIter.ForEach(func(branch *git.Branch, branchType 
git.BranchType) error {
-               select {
-               case <-ctx.Done():
-                       return ctx.Err()
-               default:
-               }
-               if branch.IsBranch() || branch.IsRemote() {
-                       isHead, _ := branch.IsHead()
-                       if !isHead {
-                               count++
-                       }
-               }
-               return nil
-       })
-       return count, errors.Convert(err)
-}
-
-// CountCommits count the number of commits in a git repo
-func (r *GitRepo) CountCommits(ctx context.Context) (int, errors.Error) {
-       odb, err := r.repo.Odb()
-       if err != nil {
-               return 0, errors.Convert(err)
-       }
-       count := 0
-       err = odb.ForEach(func(id *git.Oid) error {
-               select {
-               case <-ctx.Done():
-                       return ctx.Err()
-               default:
-               }
-               commit, e := r.repo.LookupCommit(id)
-               if e != nil && e.Error() != TypeNotMatchError {
-                       return errors.Convert(e)
-               }
-               if commit != nil {
-                       count++
-               }
-               return nil
-       })
-       return count, errors.Convert(err)
-}
-
-// CollectTags Collect Tags data
-func (r *GitRepo) CollectTags(subtaskCtx plugin.SubTaskContext) errors.Error {
-       return errors.Convert(r.repo.Tags.Foreach(func(name string, id 
*git.Oid) error {
-               select {
-               case <-subtaskCtx.GetContext().Done():
-                       return subtaskCtx.GetContext().Err()
-               default:
-               }
-               var err1 error
-               var tag *git.Tag
-               var tagCommit string
-               tag, err1 = r.repo.LookupTag(id)
-               if err1 != nil && err1.Error() != TypeNotMatchError {
-                       return errors.Convert(err1)
-               }
-               if tag != nil {
-                       tagCommit = tag.TargetId().String()
-               } else {
-                       tagCommit = id.String()
-               }
-               r.logger.Info("tagCommit:%s", tagCommit)
-               if tagCommit != "" {
-                       ref := &code.Ref{
-                               DomainEntityExtended: 
domainlayer.DomainEntityExtended{Id: fmt.Sprintf("%s:%s", r.id, name)},
-                               RepoId:               r.id,
-                               Name:                 name,
-                               CommitSha:            tagCommit,
-                               RefType:              TAG,
-                       }
-                       err1 = r.store.Refs(ref)
-                       if err1 != nil {
-                               return err1
-                       }
-                       subtaskCtx.IncProgress(1)
-               }
-               return nil
-       }))
-}
-
-// CollectBranches Collect branch data
-func (r *GitRepo) CollectBranches(subtaskCtx plugin.SubTaskContext) 
errors.Error {
-       var repoInter *git.BranchIterator
-       repoInter, err := r.repo.NewBranchIterator(git.BranchAll)
-       if err != nil {
-               return errors.Convert(err)
-       }
-       return errors.Convert(repoInter.ForEach(func(branch *git.Branch, 
branchType git.BranchType) error {
-               select {
-               case <-subtaskCtx.GetContext().Done():
-                       return subtaskCtx.GetContext().Err()
-               default:
-               }
-               if branch.IsBranch() || branch.IsRemote() {
-                       name, err1 := branch.Name()
-                       if err1 != nil && err1.Error() != TypeNotMatchError {
-                               return err1
-                       }
-                       var sha string
-                       if oid := branch.Target(); oid != nil {
-                               sha = oid.String()
-                       }
-                       ref := &code.Ref{
-                               DomainEntityExtended: 
domainlayer.DomainEntityExtended{Id: fmt.Sprintf("%s:%s", r.id, name)},
-                               RepoId:               r.id,
-                               Name:                 name,
-                               CommitSha:            sha,
-                               RefType:              BRANCH,
-                       }
-                       ref.IsDefault, err1 = branch.IsHead()
-                       if err1 != nil && err1.Error() != TypeNotMatchError {
-                               return err1
-                       }
-                       err1 = r.store.Refs(ref)
-                       if err1 != nil && err1.Error() != TypeNotMatchError {
-                               return err1
-                       }
-                       subtaskCtx.IncProgress(1)
-                       return nil
-               }
-               return nil
-       }))
-}
-
-// CollectCommits Collect data from each commit, we can also get the diff line
-func (r *GitRepo) CollectCommits(subtaskCtx plugin.SubTaskContext) 
errors.Error {
-       opts, err := getDiffOpts()
-       if err != nil {
-               return err
-       }
-       db := subtaskCtx.GetDal()
-       components := make([]code.Component, 0)
-       err = db.All(&components, dal.From(components), dal.Where("repo_id= ?", 
r.id))
-       if err != nil {
-               return err
-       }
-       componentMap := make(map[string]*regexp.Regexp)
-       for _, component := range components {
-               componentMap[component.Name] = 
regexp.MustCompile(component.PathRegex)
-       }
-       odb, err := errors.Convert01(r.repo.Odb())
-       if err != nil {
-               return err
-       }
-       return errors.Convert(odb.ForEach(func(id *git.Oid) error {
-               select {
-               case <-subtaskCtx.GetContext().Done():
-                       return subtaskCtx.GetContext().Err()
-               default:
-               }
-               commit, err1 := r.repo.LookupCommit(id)
-               if err1 != nil && err1.Error() != TypeNotMatchError {
-                       return errors.Convert(err1)
-               }
-               if commit == nil {
-                       return nil
-               }
-               commitSha := commit.Id().String()
-               r.logger.Debug("process commit: %s", commitSha)
-               c := &code.Commit{
-                       Sha:     commitSha,
-                       Message: commit.Message(),
-               }
-               author := commit.Author()
-               if author != nil {
-                       c.AuthorName = author.Name
-                       c.AuthorEmail = author.Email
-                       c.AuthorId = author.Email
-                       c.AuthoredDate = author.When
-               }
-               committer := commit.Committer()
-               if committer != nil {
-                       c.CommitterName = committer.Name
-                       c.CommitterEmail = committer.Email
-                       c.CommitterId = committer.Email
-                       c.CommittedDate = committer.When
-               }
-               err = r.storeParentCommits(commitSha, commit)
-               if err != nil {
-                       return err
-               }
-               var parent *git.Commit
-               if commit.ParentCount() > 0 {
-                       parent = commit.Parent(0)
-               }
-               var stats *git.DiffStats
-               if stats, err = r.getDiffComparedToParent(c.Sha, commit, 
parent, opts, componentMap); err != nil {
-                       return err
-               }
-               c.Additions += stats.Insertions()
-               c.Deletions += stats.Deletions()
-               err = r.store.Commits(c)
-               if err != nil {
-                       return err
-               }
-               repoCommit := &code.RepoCommit{
-                       RepoId:    r.id,
-                       CommitSha: c.Sha,
-               }
-               err = r.store.RepoCommits(repoCommit)
-               if err != nil {
-                       return err
-               }
-               subtaskCtx.IncProgress(1)
-               return nil
-       }))
-}
-
-func (r *GitRepo) storeParentCommits(commitSha string, commit *git.Commit) 
errors.Error {
-       var commitParents []*code.CommitParent
-       for i := uint(0); i < commit.ParentCount(); i++ {
-               parent := commit.Parent(i)
-               if parent != nil {
-                       if parentId := parent.Id(); parentId != nil {
-                               commitParents = append(commitParents, 
&code.CommitParent{
-                                       CommitSha:       commitSha,
-                                       ParentCommitSha: parentId.String(),
-                               })
-                       }
-               }
-       }
-       return r.store.CommitParents(commitParents)
-}
-
-func (r *GitRepo) getDiffComparedToParent(commitSha string, commit 
*git.Commit, parent *git.Commit, opts *git.DiffOptions, componentMap 
map[string]*regexp.Regexp) (*git.DiffStats, errors.Error) {
-       var err error
-       var parentTree, tree *git.Tree
-       if parent != nil {
-               parentTree, err = parent.Tree()
-       }
-       if err != nil {
-               return nil, errors.Convert(err)
-       }
-       tree, err = commit.Tree()
-       if err != nil {
-               return nil, errors.Convert(err)
-       }
-       var diff *git.Diff
-       diff, err = r.repo.DiffTreeToTree(parentTree, tree, opts)
-       if err != nil {
-               return nil, errors.Convert(err)
-       }
-       cfg := config.GetConfig()
-       skipCommitFiles := cfg.GetBool(SkipCommitFiles)
-       if !skipCommitFiles {
-               err = r.storeCommitFilesFromDiff(commitSha, diff, componentMap)
-               if err != nil {
-                       return nil, errors.Convert(err)
-               }
-       }
-       var stats *git.DiffStats
-       stats, err = diff.Stats()
-       if err != nil {
-               return nil, errors.Convert(err)
-       }
-       return stats, nil
-}
-
-func (r *GitRepo) storeCommitFilesFromDiff(commitSha string, diff *git.Diff, 
componentMap map[string]*regexp.Regexp) errors.Error {
-       var commitFile *code.CommitFile
-       var commitFileComponent *code.CommitFileComponent
-       var err error
-       err = diff.ForEach(func(file git.DiffDelta, progress float64) (
-               git.DiffForEachHunkCallback, error) {
-               if commitFile != nil {
-                       err = r.store.CommitFiles(commitFile)
-                       if err != nil {
-                               r.logger.Error(err, "CommitFiles error")
-                               return nil, err
-                       }
-               }
+type RepoCollector interface {
+       SetCleanUp(func()) error
+       Close(ctx context.Context) error
 
-               commitFile = new(code.CommitFile)
-               commitFile.CommitSha = commitSha
-               commitFile.FilePath = file.NewFile.Path
+       CollectAll(subtaskCtx plugin.SubTaskContext) error
 
-               // With some long path,the varchar(255) was not enough both ID 
and file_path
-               // So we use the hash to compress the path in ID and add length 
of file_path.
-               // Use commitSha and the sha256 of FilePath to create id
-               shaFilePath := sha256.New()
-               shaFilePath.Write([]byte(file.NewFile.Path))
-               commitFile.Id = commitSha + ":" + 
hex.EncodeToString(shaFilePath.Sum(nil))
-
-               commitFileComponent = new(code.CommitFileComponent)
-               for component, reg := range componentMap {
-                       if reg.MatchString(commitFile.FilePath) {
-                               commitFileComponent.ComponentName = component
-                               break
-                       }
-               }
-               commitFileComponent.CommitFileId = commitFile.Id
-               if commitFileComponent.ComponentName == "" {
-                       commitFileComponent.ComponentName = "Default"
-               }
-               return func(hunk git.DiffHunk) (git.DiffForEachLineCallback, 
error) {
-                       return func(line git.DiffLine) error {
-                               if line.Origin == git.DiffLineAddition {
-                                       commitFile.Additions += line.NumLines
-                               }
-                               if line.Origin == git.DiffLineDeletion {
-                                       commitFile.Deletions += line.NumLines
-                               }
-                               return nil
-                       }, nil
-               }, nil
-       }, git.DiffDetailLines)
-       if commitFileComponent != nil {
-               err = r.store.CommitFileComponents(commitFileComponent)
-               if err != nil {
-                       r.logger.Error(err, "CommitFileComponents error")
-               }
-       }
-       if commitFile != nil {
-               err = r.store.CommitFiles(commitFile)
-               if err != nil {
-                       r.logger.Error(err, "CommitFiles error")
-               }
-       }
-       return errors.Convert(err)
-}
-
-// CollectDiffLine get line diff data from a specific branch
-func (r *GitRepo) CollectDiffLine(subtaskCtx plugin.SubTaskContext) 
errors.Error {
-       //Using this subtask,we can get every line change in every commit.
-       //We maintain a snapshot structure to get which commit each deleted 
line belongs to
-       snapshot := make(map[string] /*file path*/ *models.FileBlame)
-       repo := r.repo
-       //step 1. get the reverse commit list
-       commitList := make([]git.Commit, 0)
-       //get currently head commitsha, dafault is master branch
-       // check branch, if not master, checkout to branch's head
-       commitOid, err1 := repo.Head()
-       if err1 != nil && err1.Error() != TypeNotMatchError {
-               return errors.Convert(err1)
-       }
-       //get head commit object and add into commitList
-       commit, err1 := repo.LookupCommit(commitOid.Target())
-       if err1 != nil && err1.Error() != TypeNotMatchError {
-               return errors.Convert(err1)
-       }
-       commitList = append(commitList, *commit)
-       // if current head has parents, get parent commitsha
-       for commit != nil && commit.ParentCount() > 0 {
-               pid := commit.ParentId(0)
-               commit, err1 = repo.LookupCommit(pid)
-               if err1 != nil && err1.Error() != TypeNotMatchError {
-                       return errors.Convert(err1)
-               }
-               commitList = append(commitList, *commit)
-       }
-       // reverse commitList
-       for i, j := 0, len(commitList)-1; i < j; i, j = i+1, j-1 {
-               commitList[i], commitList[j] = commitList[j], commitList[i]
-       }
-       //step 2. get the diff of each commit
-       // for each commit, get the diff
-       for _, commitsha := range commitList {
-               curcommit, err := repo.LookupCommit(commitsha.Id())
-               if err != nil {
-                       return errors.Convert(err)
-               }
-               if curcommit.ParentCount() == 0 || curcommit.ParentCount() > 0 {
-                       var parentTree, tree *git.Tree
-                       tree, err = curcommit.Tree()
-                       if err != nil {
-                               return errors.Convert(err)
-                       }
-                       var diff *git.Diff
-                       //FIXME error type convert
-                       opts, err := git.DefaultDiffOptions()
-                       opts.NotifyCallback = func(diffSoFar *git.Diff, delta 
git.DiffDelta, matchedPathSpec string) error {
-                               return nil
-                       }
-                       if err != nil {
-                               return errors.Convert(err)
-                       }
-                       if curcommit.ParentCount() > 0 {
-                               parent := curcommit.Parent(0)
-                               parentTree, err = parent.Tree()
-                       }
-                       diff, err = repo.DiffTreeToTree(parentTree, tree, &opts)
-                       if err != nil {
-                               return errors.Convert(err)
-                       }
-                       deleted := make(models.DiffLines, 0)
-                       added := make(models.DiffLines, 0)
-                       var lastFile string
-                       lastFile = ""
-                       err = diff.ForEach(func(file git.DiffDelta, progress 
float64) (git.DiffForEachHunkCallback, error) {
-                               //if doesn't exist in snapshot, create a new one
-                               if _, ok := snapshot[file.OldFile.Path]; !ok {
-                                       fileBlame, err := models.NewFileBlame()
-                                       if err != nil {
-                                               r.logger.Info("Create FileBlame 
Error")
-                                               return nil, err
-                                       }
-                                       snapshot[file.OldFile.Path] = 
(*models.FileBlame)(fileBlame)
-                               }
-                               if lastFile == "" {
-                                       lastFile = file.NewFile.Path
-                               } else if lastFile != file.NewFile.Path {
-                                       updateSnapshotFileBlame(curcommit, 
deleted, added, lastFile, snapshot)
-                                       //reset the deleted and added,last_file 
now is current file
-                                       deleted = make([]git.DiffLine, 0)
-                                       added = make([]git.DiffLine, 0)
-                                       lastFile = file.NewFile.Path
-                               }
-                               hunkNum := 0
-                               return func(hunk git.DiffHunk) 
(git.DiffForEachLineCallback, error) {
-                                       hunkNum++
-                                       return func(line git.DiffLine) error {
-                                               commitLineChange := 
&code.CommitLineChange{}
-                                               commitLineChange.CommitSha = 
curcommit.Id().String()
-                                               commitLineChange.ChangedType = 
line.Origin.String()
-                                               commitLineChange.LineNoNew = 
line.NewLineno
-                                               commitLineChange.LineNoOld = 
line.OldLineno
-                                               commitLineChange.OldFilePath = 
file.OldFile.Path
-                                               commitLineChange.NewFilePath = 
file.NewFile.Path
-                                               commitLineChange.HunkNum = 
hunkNum
-                                               commitLineChange.Id = 
curcommit.Id().String() + ":" + file.NewFile.Path + ":" + 
strconv.Itoa(line.OldLineno) + ":" + strconv.Itoa(line.NewLineno)
-                                               if line.Origin == 
git.DiffLineAddition {
-                                                       added = append(added, 
line)
-                                               } else if line.Origin == 
git.DiffLineDeletion {
-                                                       fb := 
snapshot[file.OldFile.Path]
-                                                       l := 
fb.Find(line.OldLineno)
-                                                       if l != nil && l.Value 
!= nil {
-                                                               temp := 
snapshot[file.OldFile.Path].Find(line.OldLineno)
-                                                               
commitLineChange.PrevCommit = temp.Value.(string)
-                                                       } else {
-                                                               
r.logger.Info("err", file.OldFile.Path, line.OldLineno, curcommit.Id().String())
-                                                       }
-                                                       deleted = 
append(deleted, line)
-                                               }
-                                               err = 
r.store.CommitLineChange(commitLineChange)
-                                               if err != nil {
-                                                       return 
errors.Convert(err)
-                                               }
-                                               return nil
-                                       }, nil
-                               }, nil
-                       }, git.DiffDetailLines)
-                       if err != nil {
-                               return errors.Convert(err)
-                       }
-                       //finally,process the last file in diff
-                       updateSnapshotFileBlame(curcommit, deleted, added, 
lastFile, snapshot)
-               }
-       }
-       r.logger.Info("line change collect success")
-       db := subtaskCtx.GetDal()
-       err := db.Delete(&code.RepoSnapshot{}, dal.Where("repo_id= ?", r.id))
-       if err != nil {
-               return errors.Convert(err)
-       }
-       for fp := range snapshot {
-               temp := snapshot[fp]
-               count := 0
-               for e := temp.Lines.Front(); e != nil; e = e.Next() {
-                       count++
-                       snapshotLine := &code.RepoSnapshot{}
-                       snapshotLine.RepoId = r.id
-                       snapshotLine.LineNo = count
-                       snapshotLine.CommitSha = e.Value.(string)
-                       snapshotLine.FilePath = fp
-                       err := r.store.RepoSnapshot(snapshotLine)
-                       if err != nil {
-                               r.logger.Info("error")
-                               return err
-                       }
-               }
-
-       }
-
-       r.logger.Info("collect snapshot finished")
-       return nil
-}
-
-func updateSnapshotFileBlame(currentCommit *git.Commit, deleted 
models.DiffLines, added models.DiffLines, lastFile string, snapshot 
map[string]*models.FileBlame) {
-       sort.Sort(deleted)
-       for _, line := range deleted {
-               snapshot[lastFile].RemoveLine(line.OldLineno)
-       }
-       for _, line := range added {
-               snapshot[lastFile].AddLine(line.NewLineno, 
currentCommit.Id().String())
-       }
-}
+       CountTags(ctx context.Context) (int, error)
+       CountBranches(ctx context.Context) (int, error)
+       CountCommits(ctx context.Context) (int, error)
 
-func getDiffOpts() (*git.DiffOptions, errors.Error) {
-       opts, err := git.DefaultDiffOptions()
-       if err != nil {
-               return nil, errors.Convert(err)
-       }
-       opts.NotifyCallback = func(diffSoFar *git.Diff, delta git.DiffDelta, 
matchedPathSpec string) error {
-               return nil
-       }
-       return &opts, nil
+       CollectTags(subtaskCtx plugin.SubTaskContext) error
+       CollectBranches(subtaskCtx plugin.SubTaskContext) error
+       CollectCommits(subtaskCtx plugin.SubTaskContext) error
+       CollectDiffLine(subtaskCtx plugin.SubTaskContext) error
 }
diff --git a/backend/plugins/gitextractor/parser/repo_creator.go 
b/backend/plugins/gitextractor/parser/repo_creator.go
index 0d6ec2e4a..8fef793d8 100644
--- a/backend/plugins/gitextractor/parser/repo_creator.go
+++ b/backend/plugins/gitextractor/parser/repo_creator.go
@@ -21,12 +21,14 @@ import (
        "github.com/apache/incubator-devlake/core/errors"
        "github.com/apache/incubator-devlake/core/log"
        "github.com/apache/incubator-devlake/plugins/gitextractor/models"
+       gogit "github.com/go-git/go-git/v5"
        git "github.com/libgit2/git2go/v33"
 )
 
 const (
-       BRANCH = "BRANCH"
-       TAG    = "TAG"
+       BRANCH      = "BRANCH"
+       TAG         = "TAG"
+       EnableGoGit = true
 )
 
 type GitRepoCreator struct {
@@ -59,3 +61,21 @@ func (l *GitRepoCreator) newGitRepo(repoId string, repo 
*git.Repository) *GitRep
                repo:   repo,
        }
 }
+
+// LocalGoGitRepo open a local repository with go-git
+func (l *GitRepoCreator) LocalGoGitRepo(repoPath, repoId string) (*GoGitRepo, 
errors.Error) {
+       goGitRepo, err := gogit.PlainOpen(repoPath)
+       if err != nil {
+               return nil, errors.Convert(err)
+       }
+       return l.newGoGitRepo(repoId, goGitRepo), nil
+}
+
+func (l *GitRepoCreator) newGoGitRepo(repoId string, goGitRespo 
*gogit.Repository) *GoGitRepo {
+       return &GoGitRepo{
+               logger: l.logger,
+               id:     repoId,
+               repo:   goGitRespo,
+               store:  l.store,
+       }
+}
diff --git a/backend/plugins/gitextractor/parser/repo_gogit.go 
b/backend/plugins/gitextractor/parser/repo_gogit.go
new file mode 100644
index 000000000..82be9b365
--- /dev/null
+++ b/backend/plugins/gitextractor/parser/repo_gogit.go
@@ -0,0 +1,546 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package parser
+
+import (
+       "context"
+       "crypto/sha256"
+       "encoding/hex"
+       "fmt"
+       "github.com/apache/incubator-devlake/core/dal"
+       "github.com/apache/incubator-devlake/core/errors"
+       "github.com/apache/incubator-devlake/core/log"
+       "github.com/apache/incubator-devlake/core/models/domainlayer"
+       "github.com/apache/incubator-devlake/core/models/domainlayer/code"
+       "github.com/apache/incubator-devlake/core/plugin"
+       "github.com/apache/incubator-devlake/plugins/gitextractor/models"
+       gogit "github.com/go-git/go-git/v5"
+       "github.com/go-git/go-git/v5/plumbing"
+       "github.com/go-git/go-git/v5/plumbing/object"
+       "github.com/go-git/go-git/v5/plumbing/storer"
+       "regexp"
+)
+
+type GoGitRepo struct {
+       id      string
+       logger  log.Logger
+       store   models.Store
+       repo    *gogit.Repository
+       cleanUp func()
+}
+
+func (r *GoGitRepo) SetCleanUp(f func()) error {
+       if f != nil {
+               r.cleanUp = f
+       }
+       return nil
+}
+
+func (r *GoGitRepo) Close(ctx context.Context) error {
+       if err := r.store.Close(); err != nil {
+               return err
+       }
+       if r.cleanUp != nil {
+               r.cleanUp()
+       }
+       return nil
+}
+
+// CollectAll The main parser subtask
+func (r *GoGitRepo) CollectAll(subtaskCtx plugin.SubTaskContext) error {
+       subtaskCtx.SetProgress(0, -1)
+       err := r.CollectTags(subtaskCtx)
+       if err != nil {
+               return err
+       }
+       err = r.CollectBranches(subtaskCtx)
+       if err != nil {
+               return err
+       }
+       err = r.CollectCommits(subtaskCtx)
+       if err != nil {
+               return err
+       }
+       return r.CollectDiffLine(subtaskCtx)
+}
+
+// CountTags Count git tags subtask
+func (r *GoGitRepo) CountTags(ctx context.Context) (int, error) {
+       iter, err := r.repo.Tags()
+       if err != nil {
+               return 0, err
+       }
+       var tagsCount int
+       if err := iter.ForEach(func(reference *plumbing.Reference) error {
+               select {
+               case <-ctx.Done():
+                       return ctx.Err()
+               default:
+               }
+               tagsCount += 1
+               return nil
+       }); err != nil {
+               return 0, err
+       }
+       return tagsCount, nil
+}
+
+// CountBranches count the number of branches in a git repo
+func (r *GoGitRepo) CountBranches(ctx context.Context) (int, error) {
+       refIter, err := r.repo.Storer.IterReferences()
+       if err != nil {
+               return 0, err
+       }
+       branchIter := storer.NewReferenceFilteredIter(
+               func(r *plumbing.Reference) bool {
+                       return r.Name().IsBranch() || r.Name().IsRemote()
+               }, refIter)
+       if err != nil {
+               return 0, err
+       }
+       var branchesCount int
+
+       headRef, err := r.repo.Head()
+       if err != nil {
+               return 0, err
+       }
+       if err := branchIter.ForEach(func(reference *plumbing.Reference) error {
+               select {
+               case <-ctx.Done():
+                       return ctx.Err()
+               default:
+               }
+               if reference.Name() != headRef.Name() {
+                       branchesCount += 1
+               }
+               return nil
+       }); err != nil {
+               return 0, err
+       }
+       return branchesCount, nil
+}
+
+// CountCommits count the number of commits in a git repo
+func (r *GoGitRepo) CountCommits(ctx context.Context) (int, error) {
+       iter, err := r.repo.CommitObjects()
+       if err != nil {
+               return 0, err
+       }
+       var count int
+       if err := iter.ForEach(func(commit *object.Commit) error {
+               select {
+               case <-ctx.Done():
+                       return ctx.Err()
+               default:
+               }
+               count += 1
+               return nil
+       }); err != nil {
+               return 0, err
+       }
+       return count, nil
+}
+
+// CollectTags Collect Tags data
+func (r *GoGitRepo) CollectTags(subtaskCtx plugin.SubTaskContext) error {
+       tagIter, err := r.repo.Tags()
+       if err != nil {
+               return err
+       }
+       if err := tagIter.ForEach(func(ref *plumbing.Reference) error {
+               select {
+               case <-subtaskCtx.GetContext().Done():
+                       return subtaskCtx.GetContext().Err()
+               default:
+               }
+               tagCommit := ref.Hash().String()
+               _, err := r.repo.CommitObject(ref.Hash())
+               if err != nil && errors.Is(err, plumbing.ErrObjectNotFound) {
+                       h, err := 
r.repo.ResolveRevision(plumbing.Revision(ref.Name()))
+                       if err != nil {
+                               return err
+                       }
+                       tagCommit = h.String()
+               }
+               name := ref.Name().String()
+               if tagCommit != "" {
+                       codeRef := &code.Ref{
+                               DomainEntityExtended: 
domainlayer.DomainEntityExtended{Id: fmt.Sprintf("%s:%s", r.id, name)},
+                               RepoId:               r.id,
+                               Name:                 name,
+                               CommitSha:            tagCommit,
+                               RefType:              TAG,
+                       }
+                       err = r.store.Refs(codeRef)
+                       if err != nil {
+                               return err
+                       }
+                       subtaskCtx.IncProgress(1)
+               }
+               return nil
+       }); err != nil {
+               return err
+       }
+       return nil
+}
+
+// CollectBranches Collect branch data
+func (r *GoGitRepo) CollectBranches(subtaskCtx plugin.SubTaskContext) error {
+       refIter, err := r.repo.Storer.IterReferences()
+       if err != nil {
+               return err
+       }
+       branchIter := storer.NewReferenceFilteredIter(
+               func(r *plumbing.Reference) bool {
+                       return r.Name().IsBranch() || r.Name().IsRemote()
+               }, refIter)
+       if err != nil {
+               return err
+       }
+       headRef, err := r.repo.Head()
+       if err != nil {
+               return err
+       }
+       if err := branchIter.ForEach(func(ref *plumbing.Reference) error {
+               select {
+               case <-subtaskCtx.GetContext().Done():
+                       return subtaskCtx.GetContext().Err()
+               default:
+               }
+               name := ref.Name().Short()
+               sha := ref.Hash().String()
+               _, err := r.repo.CommitObject(ref.Hash())
+               if err != nil && errors.Is(err, plumbing.ErrObjectNotFound) {
+                       // handle commit sha like 
"0000000000000000000000000000000000000000"
+                       h, err := 
r.repo.ResolveRevision(plumbing.Revision(ref.Name()))
+                       if err != nil {
+                               return err
+                       }
+                       sha = h.String()
+               }
+               codeRef := &code.Ref{
+                       DomainEntityExtended: 
domainlayer.DomainEntityExtended{Id: fmt.Sprintf("%s:%s", r.id, name)},
+                       RepoId:               r.id,
+                       Name:                 name,
+                       CommitSha:            sha,
+                       RefType:              BRANCH,
+                       IsDefault:            ref.Name() == headRef.Name(),
+               }
+               if err := r.store.Refs(codeRef); err != nil {
+                       return err
+               }
+               subtaskCtx.IncProgress(1)
+               return nil
+       }); err != nil {
+               return err
+       }
+       return nil
+}
+
+func (r *GoGitRepo) getComponentMap(subtaskCtx plugin.SubTaskContext) 
(map[string]*regexp.Regexp, error) {
+       db := subtaskCtx.GetDal()
+       components := make([]code.Component, 0)
+       err := db.All(&components, dal.From(components), dal.Where("repo_id= 
?", r.id))
+       if err != nil {
+               return nil, err
+       }
+       componentMap := make(map[string]*regexp.Regexp)
+       for _, component := range components {
+               componentMap[component.Name] = 
regexp.MustCompile(component.PathRegex)
+       }
+       return componentMap, nil
+}
+
+// CollectCommits Collect data from each commit, we can also get the diff line
+func (r *GoGitRepo) CollectCommits(subtaskCtx plugin.SubTaskContext) (err 
error) {
+       // check it first
+       componentMap, err := r.getComponentMap(subtaskCtx)
+       if err != nil {
+               return err
+       }
+
+       skipCommitFiles := subtaskCtx.GetConfigReader().GetBool(SkipCommitFiles)
+       repo := r.repo
+       store := r.store
+
+       commitsObjectsIter, err := repo.CommitObjects()
+       if err != nil {
+               return err
+       }
+
+       if err := commitsObjectsIter.ForEach(func(commit *object.Commit) error {
+               select {
+               case <-subtaskCtx.GetContext().Done():
+                       return subtaskCtx.GetContext().Err()
+               default:
+               }
+               commitSha := commit.Hash.String()
+               codeCommit := &code.Commit{
+                       Sha:            commitSha,
+                       Message:        commit.Message,
+                       AuthorName:     commit.Author.Name,
+                       AuthorEmail:    commit.Author.Email,
+                       AuthorId:       commit.Author.Email,
+                       AuthoredDate:   commit.Author.When,
+                       CommitterName:  commit.Committer.Name,
+                       CommitterEmail: commit.Committer.Email,
+                       CommitterId:    commit.Committer.Email,
+                       CommittedDate:  commit.Committer.When,
+               }
+               if err = r.storeParentCommits(commitSha, commit); err != nil {
+                       return err
+               }
+
+               stats, err := commit.StatsContext(subtaskCtx.GetContext())
+               if err != nil {
+                       return err
+               } else {
+                       for _, stat := range stats {
+                               codeCommit.Additions += stat.Addition
+                               // In some repos, deletion may be zero, which 
is different from git log --stat.
+                               // It seems go-git doesn't get the correct 
changes.
+                               // I have run object.DiffTreeWithOptions 
manually with different diff algorithms,
+                               // but get the same result with StatsContext.
+                               // I cannot reproduce it with another repo.
+                               // A similar issue: 
https://github.com/go-git/go-git/issues/367
+                               codeCommit.Deletions += stat.Deletion
+                       }
+               }
+
+               err = store.Commits(codeCommit)
+               if err != nil {
+                       return err
+               }
+
+               codeRepoCommit := &code.RepoCommit{
+                       RepoId:    r.id,
+                       CommitSha: commitSha,
+               }
+               err = store.RepoCommits(codeRepoCommit)
+               if err != nil {
+                       return err
+               }
+               if !skipCommitFiles {
+                       if err := 
r.storeDiffCommitFilesComparedToParent(subtaskCtx, componentMap, commit); err 
!= nil {
+                               return err
+                       }
+               }
+               subtaskCtx.IncProgress(1)
+               return nil
+       }); err != nil {
+               return err
+       }
+       return
+}
+
+func (r *GoGitRepo) storeParentCommits(commitSha string, commit 
*object.Commit) error {
+       if commit == nil {
+               return nil
+       }
+       var commitParents []*code.CommitParent
+       for i := 0; i < commit.NumParents(); i++ {
+               parent, err := commit.Parent(i)
+               if err != nil {
+                       return err
+               }
+               if parent != nil {
+                       if parentCommitSha := parent.Hash.String(); 
parentCommitSha != "" {
+                               commitParents = append(commitParents, 
&code.CommitParent{
+                                       CommitSha:       commitSha,
+                                       ParentCommitSha: parentCommitSha,
+                               })
+                       }
+               }
+       }
+       return r.store.CommitParents(commitParents)
+}
+
+func (r *GoGitRepo) getCurrentAndParentTree(ctx context.Context, commit 
*object.Commit) (*object.Tree, *object.Tree, error) {
+       if _, err := commit.Stats(); err != nil {
+               return nil, nil, err
+       }
+       commitTree, err := commit.Tree()
+       if err != nil {
+               return nil, nil, err
+       }
+       var firstParentTree *object.Tree
+       if commit.NumParents() > 0 {
+               firstParent, err := commit.Parents().Next()
+               if err != nil {
+                       return nil, nil, err
+               }
+               firstParentTree, err = firstParent.Tree()
+               if err != nil {
+                       return nil, nil, err
+               }
+       }
+       return commitTree, firstParentTree, nil
+}
+
+func (r *GoGitRepo) storeDiffCommitFilesComparedToParent(subtaskCtx 
plugin.SubTaskContext, componentMap map[string]*regexp.Regexp, commit 
*object.Commit) (err error) {
+       commitTree, firstParentTree, err := 
r.getCurrentAndParentTree(subtaskCtx.GetContext(), commit)
+       if err != nil {
+               return err
+       }
+       // no parent, doesn't need to patch
+       patch, err := firstParentTree.PatchContext(subtaskCtx.GetContext(), 
commitTree)
+       if err != nil {
+               return err
+       }
+       for _, p := range patch.Stats() {
+               commitFile := &code.CommitFile{
+                       CommitSha: commit.Hash.String(),
+               }
+               fileName := p.Name
+               commitFile.FilePath = fileName
+               commitFile.Id = genCommitFileId(commitFile.CommitSha, fileName)
+               commitFile.Deletions = p.Deletion
+               commitFile.Additions = p.Addition
+               if err := r.storeCommitFileComponents(subtaskCtx, componentMap, 
commitFile.Id, commitFile.FilePath); err != nil {
+                       return err
+               }
+               err = r.store.CommitFiles(commitFile)
+               if err != nil {
+                       r.logger.Error(err, "CommitFiles error")
+                       return nil
+               }
+       }
+       return nil
+}
+
+// With some long path,the varchar(255) was not enough both ID and file_path
+// So we use the hash to compress the path in ID and add length of file_path.
+// Use commitSha and the sha256 of FilePath to create id
+func genCommitFileId(commitSha, filePath string) string {
+       shaFilePath := sha256.New()
+       shaFilePath.Write([]byte(filePath))
+       return commitSha + ":" + hex.EncodeToString(shaFilePath.Sum(nil))
+}
+
+func (r *GoGitRepo) storeCommitFileComponents(subtaskCtx 
plugin.SubTaskContext, componentMap map[string]*regexp.Regexp, commitFileId 
string, commitFilePath string) error {
+       if commitFileId == "" || commitFilePath == "" {
+               return errors.Default.New("commit id r commit file path is 
empty")
+       }
+       commitFileComponent := &code.CommitFileComponent{
+               CommitFileId:  commitFileId,
+               ComponentName: "Default",
+       }
+       for component, reg := range componentMap {
+               if reg.MatchString(commitFilePath) {
+                       commitFileComponent.ComponentName = component
+                       break
+               }
+       }
+       return r.store.CommitFileComponents(commitFileComponent)
+}
+
+// storeRepoSnapshot depends on commit list's order.
+func (r *GoGitRepo) storeRepoSnapshot(subtaskCtx plugin.SubTaskContext, 
commitList []*object.Commit) error {
+       ctx := subtaskCtx.GetContext()
+       snapshot := make(map[string][]string) // {"filePathAndName": ["line1 
commit sha", "line2 commit sha"]}
+       for _, commit := range commitList {
+               commitTree, firstParentTree, err := 
r.getCurrentAndParentTree(ctx, commit)
+               if err != nil {
+                       return err
+               }
+               patch, err := 
firstParentTree.PatchContext(subtaskCtx.GetContext(), commitTree)
+               if err != nil {
+                       return err
+               }
+               for _, p := range patch.Stats() {
+                       fileName := p.Name
+                       if _, ok := snapshot[fileName]; !ok {
+                               snapshot[fileName] = []string{}
+                       }
+                       blameResults, err := gogit.Blame(commit, fileName)
+                       if err != nil {
+                               return err
+                       }
+                       var newBlames []string
+                       for _, blameResult := range blameResults.Lines {
+                               newBlames = append(newBlames, 
blameResult.Hash.String())
+                       }
+                       snapshot[fileName] = newBlames
+               }
+       }
+       // store snapshots
+       for fileName, lineBlames := range snapshot {
+               for idx, lineBlameHash := range lineBlames {
+                       lineNo := idx + 1
+                       repoSnapshot := &code.RepoSnapshot{
+                               RepoId:    r.id,
+                               CommitSha: lineBlameHash,
+                               FilePath:  fileName,
+                               LineNo:    lineNo,
+                       }
+                       if err := r.store.RepoSnapshot(repoSnapshot); err != 
nil {
+                               r.logger.Error(err, "store RepoSnapshot error")
+                               return err
+                       }
+               }
+       }
+       return nil
+}
+
+func (r *GoGitRepo) GetCommitList(subtaskCtx plugin.SubTaskContext) 
([]*object.Commit, error) {
+       var commitList []*object.Commit
+       // get current head commit sha, default is master branch
+       // check branch, if not master, checkout to branch's head
+       commitOid, err := r.repo.Head()
+       if err != nil {
+               return nil, err
+       }
+       // get head commit object and add into commitList
+       commit, err := r.repo.CommitObject(commitOid.Hash())
+       if err != nil {
+               return nil, err
+       }
+       commitList = append(commitList, commit)
+       // if current head has parents, get parent commit sha
+       for commit != nil && commit.NumParents() > 0 {
+               parentCommit, err := commit.Parent(0)
+               if err != nil {
+                       return nil, err
+               }
+               commit, err = r.repo.CommitObject(parentCommit.Hash)
+               if err != nil {
+                       return nil, err
+               }
+               commitList = append(commitList, commit)
+       }
+       // reverse commitList
+       // use slices.Reverse(commitList) in higher golang version.
+       for i, j := 0, len(commitList)-1; i < j; i, j = i+1, j-1 {
+               commitList[i], commitList[j] = commitList[j], commitList[i]
+       }
+       return commitList, nil
+}
+
+func (r *GoGitRepo) CollectDiffLine(subtaskCtx plugin.SubTaskContext) error {
+       commitList, err := r.GetCommitList(subtaskCtx)
+       if err != nil {
+               return err
+       }
+       if err := r.storeRepoSnapshot(subtaskCtx, commitList); err != nil {
+               return err
+       }
+       // fixme: collecting CommitLineChange is not implemented.
+       // There is no way to get such information with go-git, and table 
commit_line_change is not used by any dashboards
+       // So we just ignore it.
+       return nil
+}
diff --git a/backend/plugins/gitextractor/parser/repo.go 
b/backend/plugins/gitextractor/parser/repo_libgit2.go
similarity index 96%
copy from backend/plugins/gitextractor/parser/repo.go
copy to backend/plugins/gitextractor/parser/repo_libgit2.go
index 1a3a90d22..43df1401e 100644
--- a/backend/plugins/gitextractor/parser/repo.go
+++ b/backend/plugins/gitextractor/parser/repo_libgit2.go
@@ -22,10 +22,6 @@ import (
        "crypto/sha256"
        "encoding/hex"
        "fmt"
-       "regexp"
-       "sort"
-       "strconv"
-
        "github.com/apache/incubator-devlake/core/config"
        "github.com/apache/incubator-devlake/core/dal"
        "github.com/apache/incubator-devlake/core/errors"
@@ -34,6 +30,9 @@ import (
        "github.com/apache/incubator-devlake/core/models/domainlayer/code"
        "github.com/apache/incubator-devlake/core/plugin"
        "github.com/apache/incubator-devlake/plugins/gitextractor/models"
+       "regexp"
+       "sort"
+       "strconv"
 
        git "github.com/libgit2/git2go/v33"
 )
@@ -43,15 +42,23 @@ const SkipCommitFiles = "SKIP_COMMIT_FILES"
 var TypeNotMatchError = "the requested type does not match the type in the ODB"
 
 type GitRepo struct {
+       id     string
+       logger log.Logger
+
        store   models.Store
-       logger  log.Logger
-       id      string
        repo    *git.Repository
        cleanup func()
 }
 
+func (r *GitRepo) SetCleanUp(f func()) error {
+       if f != nil {
+               r.cleanup = f
+       }
+       return nil
+}
+
 // CollectAll The main parser subtask
-func (r *GitRepo) CollectAll(subtaskCtx plugin.SubTaskContext) errors.Error {
+func (r *GitRepo) CollectAll(subtaskCtx plugin.SubTaskContext) error {
        subtaskCtx.SetProgress(0, -1)
        err := r.CollectTags(subtaskCtx)
        if err != nil {
@@ -69,7 +76,7 @@ func (r *GitRepo) CollectAll(subtaskCtx 
plugin.SubTaskContext) errors.Error {
 }
 
 // Close resources
-func (r *GitRepo) Close() errors.Error {
+func (r *GitRepo) Close(ctx context.Context) error {
        defer func() {
                if r.cleanup != nil {
                        r.cleanup()
@@ -79,7 +86,7 @@ func (r *GitRepo) Close() errors.Error {
 }
 
 // CountTags Count git tags subtask
-func (r *GitRepo) CountTags() (int, errors.Error) {
+func (r *GitRepo) CountTags(ctx context.Context) (int, error) {
        tags, err := r.repo.Tags.List()
        if err != nil {
                return 0, errors.Convert(err)
@@ -88,7 +95,7 @@ func (r *GitRepo) CountTags() (int, errors.Error) {
 }
 
 // CountBranches count the number of branches in a git repo
-func (r *GitRepo) CountBranches(ctx context.Context) (int, errors.Error) {
+func (r *GitRepo) CountBranches(ctx context.Context) (int, error) {
        var branchIter *git.BranchIterator
        branchIter, err := r.repo.NewBranchIterator(git.BranchAll)
        if err != nil {
@@ -113,7 +120,7 @@ func (r *GitRepo) CountBranches(ctx context.Context) (int, 
errors.Error) {
 }
 
 // CountCommits count the number of commits in a git repo
-func (r *GitRepo) CountCommits(ctx context.Context) (int, errors.Error) {
+func (r *GitRepo) CountCommits(ctx context.Context) (int, error) {
        odb, err := r.repo.Odb()
        if err != nil {
                return 0, errors.Convert(err)
@@ -138,7 +145,7 @@ func (r *GitRepo) CountCommits(ctx context.Context) (int, 
errors.Error) {
 }
 
 // CollectTags Collect Tags data
-func (r *GitRepo) CollectTags(subtaskCtx plugin.SubTaskContext) errors.Error {
+func (r *GitRepo) CollectTags(subtaskCtx plugin.SubTaskContext) error {
        return errors.Convert(r.repo.Tags.Foreach(func(name string, id 
*git.Oid) error {
                select {
                case <-subtaskCtx.GetContext().Done():
@@ -157,7 +164,7 @@ func (r *GitRepo) CollectTags(subtaskCtx 
plugin.SubTaskContext) errors.Error {
                } else {
                        tagCommit = id.String()
                }
-               r.logger.Info("tagCommit:%s", tagCommit)
+               r.logger.Debug("tagCommit: %s", tagCommit)
                if tagCommit != "" {
                        ref := &code.Ref{
                                DomainEntityExtended: 
domainlayer.DomainEntityExtended{Id: fmt.Sprintf("%s:%s", r.id, name)},
@@ -177,7 +184,7 @@ func (r *GitRepo) CollectTags(subtaskCtx 
plugin.SubTaskContext) errors.Error {
 }
 
 // CollectBranches Collect branch data
-func (r *GitRepo) CollectBranches(subtaskCtx plugin.SubTaskContext) 
errors.Error {
+func (r *GitRepo) CollectBranches(subtaskCtx plugin.SubTaskContext) error {
        var repoInter *git.BranchIterator
        repoInter, err := r.repo.NewBranchIterator(git.BranchAll)
        if err != nil {
@@ -205,6 +212,7 @@ func (r *GitRepo) CollectBranches(subtaskCtx 
plugin.SubTaskContext) errors.Error
                                CommitSha:            sha,
                                RefType:              BRANCH,
                        }
+                       // commit sha may be an empty string, it's unexpected.
                        ref.IsDefault, err1 = branch.IsHead()
                        if err1 != nil && err1.Error() != TypeNotMatchError {
                                return err1
@@ -221,7 +229,7 @@ func (r *GitRepo) CollectBranches(subtaskCtx 
plugin.SubTaskContext) errors.Error
 }
 
 // CollectCommits Collect data from each commit, we can also get the diff line
-func (r *GitRepo) CollectCommits(subtaskCtx plugin.SubTaskContext) 
errors.Error {
+func (r *GitRepo) CollectCommits(subtaskCtx plugin.SubTaskContext) error {
        opts, err := getDiffOpts()
        if err != nil {
                return err
@@ -285,8 +293,10 @@ func (r *GitRepo) CollectCommits(subtaskCtx 
plugin.SubTaskContext) errors.Error
                if stats, err = r.getDiffComparedToParent(c.Sha, commit, 
parent, opts, componentMap); err != nil {
                        return err
                }
+               r.logger.Debug("state: %#+v\n", stats.Deletions())
                c.Additions += stats.Insertions()
                c.Deletions += stats.Deletions()
+
                err = r.store.Commits(c)
                if err != nil {
                        return err
@@ -418,14 +428,14 @@ func (r *GitRepo) storeCommitFilesFromDiff(commitSha 
string, diff *git.Diff, com
 }
 
 // CollectDiffLine get line diff data from a specific branch
-func (r *GitRepo) CollectDiffLine(subtaskCtx plugin.SubTaskContext) 
errors.Error {
+func (r *GitRepo) CollectDiffLine(subtaskCtx plugin.SubTaskContext) error {
        //Using this subtask,we can get every line change in every commit.
        //We maintain a snapshot structure to get which commit each deleted 
line belongs to
        snapshot := make(map[string] /*file path*/ *models.FileBlame)
        repo := r.repo
        //step 1. get the reverse commit list
        commitList := make([]git.Commit, 0)
-       //get currently head commitsha, dafault is master branch
+       // get current head commit sha, default is master branch
        // check branch, if not master, checkout to branch's head
        commitOid, err1 := repo.Head()
        if err1 != nil && err1.Error() != TypeNotMatchError {
@@ -485,7 +495,7 @@ func (r *GitRepo) CollectDiffLine(subtaskCtx 
plugin.SubTaskContext) errors.Error
                        var lastFile string
                        lastFile = ""
                        err = diff.ForEach(func(file git.DiffDelta, progress 
float64) (git.DiffForEachHunkCallback, error) {
-                               //if doesn't exist in snapshot, create a new one
+                               // if it doesn't exist in snapshot, create a 
new one
                                if _, ok := snapshot[file.OldFile.Path]; !ok {
                                        fileBlame, err := models.NewFileBlame()
                                        if err != nil {
@@ -498,7 +508,7 @@ func (r *GitRepo) CollectDiffLine(subtaskCtx 
plugin.SubTaskContext) errors.Error
                                        lastFile = file.NewFile.Path
                                } else if lastFile != file.NewFile.Path {
                                        updateSnapshotFileBlame(curcommit, 
deleted, added, lastFile, snapshot)
-                                       //reset the deleted and added,last_file 
now is current file
+                                       // reset the deleted and 
added,last_file now is current file
                                        deleted = make([]git.DiffLine, 0)
                                        added = make([]git.DiffLine, 0)
                                        lastFile = file.NewFile.Path
diff --git a/backend/plugins/gitextractor/parser/repo_test.go 
b/backend/plugins/gitextractor/parser/repo_test.go
new file mode 100644
index 000000000..205bfc783
--- /dev/null
+++ b/backend/plugins/gitextractor/parser/repo_test.go
@@ -0,0 +1,302 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package parser
+
+import (
+       "context"
+       "fmt"
+       "github.com/apache/incubator-devlake/core/errors"
+       "github.com/apache/incubator-devlake/core/log"
+       "github.com/apache/incubator-devlake/impls/logruslog"
+       "github.com/apache/incubator-devlake/plugins/gitextractor/models"
+       "github.com/apache/incubator-devlake/plugins/gitextractor/store"
+       "github.com/stretchr/testify/assert"
+       "os"
+       "path/filepath"
+       "testing"
+)
+
+var (
+       enableRepoTest                = false
+       repoId                        = "test-repo-id"
+       runInLocal                    = true
+       ctx                           = context.Background()
+       subTaskCtx                    = &testSubTaskContext{}
+       repoIncubatorDevlakeRemoteURL = 
"https://github.com/apache/incubator-devlake";
+       output                        = "./output"
+
+       repoIncubatorApacheDevlakeWebsite = ""
+       simpleRepo                        = ""
+
+       logger log.Logger
+
+       storage        models.Store
+       gitRepoCreator *GitRepoCreator
+
+       goGitStorage     models.Store
+       goGitRepoCreator *GitRepoCreator
+)
+
+func TestMain(m *testing.M) {
+       if !enableRepoTest {
+               return
+       }
+       fmt.Println("test main starts")
+
+       if path, err := os.Getwd(); err != nil {
+               panic(err)
+       } else {
+               repoIncubatorApacheDevlakeWebsite = filepath.Join(path, 
"test_repo", "website")
+               simpleRepo = filepath.Join(path, "test_repo", "demo")
+               fmt.Println("simple repo and website repo path have been 
initialised.")
+       }
+
+       logger = logruslog.Global.Nested("git extractor")
+       fmt.Println("logger inited")
+
+       clearOutput()
+
+       var err error
+       storage, err = store.NewCsvStore(output + "_libgit2")
+       if err != nil {
+               panic(err)
+       }
+       defer storage.Close()
+       fmt.Println("git storage inited")
+       gitRepoCreator = NewGitRepoCreator(storage, logger)
+
+       goGitStorage, err = store.NewCsvStore(output + "_gogit")
+       if err != nil {
+               panic(err)
+       }
+       defer goGitStorage.Close()
+       fmt.Println("go git storage inited")
+       goGitRepoCreator = NewGitRepoCreator(goGitStorage, logger)
+
+       fmt.Printf("test main run success\n\tlogger: %+v\tstorage: %+v\tgogit 
storage: %+v\n", logger, storage, goGitStorage)
+       m.Run()
+}
+
+func getRepos(localRepoDir string) (RepoCollector, RepoCollector) {
+       var gitRepo RepoCollector
+       var goGitRepo RepoCollector
+       var err errors.Error
+
+       if runInLocal {
+               repoPath := localRepoDir
+               gitRepo, err = gitRepoCreator.LocalRepo(repoPath, repoId)
+               if err != nil {
+                       panic(err)
+               }
+               goGitRepo, err = goGitRepoCreator.LocalGoGitRepo(repoPath, 
repoId)
+               if err != nil {
+                       panic(err)
+               }
+       } else {
+               gitRepo, err = gitRepoCreator.CloneOverHTTP(subTaskCtx, repoId, 
repoIncubatorDevlakeRemoteURL, "", "", "")
+               if err != nil {
+                       panic(err)
+               }
+               goGitRepo, err = 
goGitRepoCreator.CloneGoGitRepoOverHTTP(subTaskCtx, repoId, 
repoIncubatorDevlakeRemoteURL, "", "", "")
+               if err != nil {
+                       panic(err)
+               }
+       }
+       return goGitRepo, gitRepo
+}
+
+func TestGitRepo_CountRepoInfo(t *testing.T) {
+       if !enableRepoTest {
+               return
+       }
+       goGitRepo, gitRepo := getRepos(repoIncubatorApacheDevlakeWebsite)
+
+       {
+               tagsCount1, err1 := gitRepo.CountTags(ctx)
+               if err1 != nil {
+                       panic(err1)
+               }
+               tagsCount2, err2 := goGitRepo.CountTags(ctx)
+               if err2 != nil {
+                       panic(err2)
+               }
+               t.Logf("[tagsCount] libgit2 result: %d, gogit result: %d", 
tagsCount1, tagsCount2)
+               assert.Equalf(t, tagsCount1, tagsCount2, "unexpected")
+       }
+
+       {
+               branchesCount1, err1 := gitRepo.CountBranches(ctx)
+               if err1 != nil {
+                       panic(err1)
+               }
+               branchesCount2, err2 := goGitRepo.CountBranches(ctx)
+               if err2 != nil {
+                       panic(err2)
+               }
+               t.Logf("[branchesCount] libgit2 result: %d, gogit result: %d", 
branchesCount1, branchesCount2)
+               assert.Equalf(t, branchesCount1, branchesCount2, "unexpected")
+       }
+
+       {
+               commitCount1, err1 := gitRepo.CountCommits(ctx)
+               if err1 != nil {
+                       panic(err1)
+               }
+               commitCount2, err2 := goGitRepo.CountCommits(ctx)
+               if err2 != nil {
+                       panic(err2)
+               }
+               t.Logf("[commitCount] libgit2 result: %d, gogit result: %d", 
commitCount1, commitCount2)
+               assert.Equalf(t, commitCount1, commitCount2, "unexpected")
+       }
+
+}
+
+// all testes pass
+func TestGitRepo_CollectRepoInfo(t *testing.T) {
+       if !enableRepoTest {
+               return
+       }
+       goGitRepo, gitRepo := getRepos(simpleRepo)
+
+       {
+               // finished
+               subTaskCtxCollectTags := &testSubTaskContext{}
+               if err1 := gitRepo.CollectTags(subTaskCtxCollectTags); err1 != 
nil {
+                       panic(err1)
+               }
+               subTaskCtxCollectTagsWithGoGit := &testSubTaskContext{}
+               if err2 := 
goGitRepo.CollectTags(subTaskCtxCollectTagsWithGoGit); err2 != nil {
+                       panic(err2)
+               }
+               t.Logf("[CollectTags] libgit2 result: %+v, gogit result: %+v", 
subTaskCtxCollectTags, subTaskCtxCollectTagsWithGoGit)
+               assert.Equalf(t, subTaskCtxCollectTags.total, 
subTaskCtxCollectTagsWithGoGit.total, "unexpected")
+       }
+
+       {
+               // finished
+               subTaskCtxCollectBranches := &testSubTaskContext{}
+               if err1 := gitRepo.CollectBranches(subTaskCtxCollectBranches); 
err1 != nil {
+                       panic(err1)
+               }
+               subTaskCtxCollectBranchesWithGoGit := &testSubTaskContext{}
+               if err2 := 
goGitRepo.CollectBranches(subTaskCtxCollectBranchesWithGoGit); err2 != nil {
+                       panic(err2)
+               }
+               t.Logf("[CollectBranches] libgit2 result: %+v, gogit result: 
%+v", subTaskCtxCollectBranches, subTaskCtxCollectBranchesWithGoGit)
+               assert.Equalf(t, subTaskCtxCollectBranches.total, 
subTaskCtxCollectBranchesWithGoGit.total, "unexpected")
+       }
+
+       {
+               subTaskCtxCollectCommits := &testSubTaskContext{}
+               if err1 := gitRepo.CollectCommits(subTaskCtxCollectCommits); 
err1 != nil {
+                       panic(err1)
+               }
+               subTaskCtxCCollectCommitsWithGoGit := &testSubTaskContext{}
+               if err2 := 
goGitRepo.CollectCommits(subTaskCtxCCollectCommitsWithGoGit); err2 != nil {
+                       panic(err2)
+               }
+
+               t.Logf("[CollectCommits] libgit2 result: %+v, gogit result: 
%+v", subTaskCtxCollectCommits, subTaskCtxCCollectCommitsWithGoGit)
+               fmt.Println(subTaskCtxCollectCommits.total, 
subTaskCtxCCollectCommitsWithGoGit.total)
+               assert.Equalf(t, subTaskCtxCollectCommits.total, 
subTaskCtxCCollectCommitsWithGoGit.total, "unexpected")
+       }
+
+       {
+               subTaskCtxCollectDiffLine := &testSubTaskContext{}
+               if err1 := gitRepo.CollectDiffLine(subTaskCtxCollectDiffLine); 
err1 != nil {
+                       panic(err1)
+               }
+               subTaskCtxCollectDiffLineWithGoGit := &testSubTaskContext{}
+               if err2 := 
goGitRepo.CollectDiffLine(subTaskCtxCollectDiffLineWithGoGit); err2 != nil {
+                       panic(err2)
+               }
+
+               t.Logf("[CollectDiffLine] libgit2 result: %+v, gogit result: 
%+v", subTaskCtxCollectDiffLine, subTaskCtxCollectDiffLineWithGoGit)
+               fmt.Println(subTaskCtxCollectDiffLine.total, 
subTaskCtxCollectDiffLineWithGoGit.total)
+               assert.Equalf(t, subTaskCtxCollectDiffLine.total, 
subTaskCtxCollectDiffLineWithGoGit.total, "unexpected")
+       }
+}
+
+func clearOutput() {
+       os.RemoveAll(fmt.Sprintf("./output_libgit2"))
+       os.RemoveAll(fmt.Sprintf("./output_gogit"))
+}
+
+func TestGitRepo_CollectCommits(t *testing.T) {
+       if !enableRepoTest {
+               return
+       }
+       repoPath := simpleRepo
+       gitRepo, err := gitRepoCreator.LocalRepo(repoPath, repoId)
+       if err != nil {
+               panic(err)
+       }
+       goGitRepo, err := goGitRepoCreator.LocalGoGitRepo(repoPath, repoId)
+       if err != nil {
+               panic(err)
+       }
+
+       {
+               subTaskCtxCollectCommits := &testSubTaskContext{}
+               if err1 := gitRepo.CollectCommits(subTaskCtxCollectCommits); 
err1 != nil {
+                       panic(err1)
+               }
+
+               subTaskCtxCCollectCommitsWithGoGit := &testSubTaskContext{}
+               if err2 := 
goGitRepo.CollectCommits(subTaskCtxCCollectCommitsWithGoGit); err2 != nil {
+                       panic(err2)
+               }
+
+               t.Logf("[CollectCommits] libgit2 result: %+v, gogit result: 
%+v", subTaskCtxCollectCommits, subTaskCtxCCollectCommitsWithGoGit)
+               fmt.Println(subTaskCtxCollectCommits.total, 
subTaskCtxCCollectCommitsWithGoGit.total)
+               assert.Equalf(t, subTaskCtxCollectCommits.total, 
subTaskCtxCCollectCommitsWithGoGit.total, "unexpected")
+       }
+}
+
+func TestGitRepo_CollectDiffLine(t *testing.T) {
+       if !enableRepoTest {
+               return
+       }
+       repoPath := simpleRepo
+       gitRepo, err := gitRepoCreator.LocalRepo(repoPath, repoId)
+       if err != nil {
+               panic(err)
+       }
+       goGitRepo, err := goGitRepoCreator.LocalGoGitRepo(repoPath, repoId)
+       if err != nil {
+               panic(err)
+       }
+
+       {
+               subTaskCtxCollectDiffLine := &testSubTaskContext{}
+               if err1 := gitRepo.CollectDiffLine(subTaskCtxCollectDiffLine); 
err1 != nil {
+                       panic(err1)
+               }
+               //t.Logf("[CollectDiffLine] libgit2 result: %+v", 
subTaskCtxCollectDiffLine)
+
+               subTaskCtxCollectDiffLineWithGoGit := &testSubTaskContext{}
+               if err2 := 
goGitRepo.CollectDiffLine(subTaskCtxCollectDiffLineWithGoGit); err2 != nil {
+                       panic(err2)
+               }
+
+               t.Logf("[CollectCommits] libgit2 result: %+v, gogit result: 
%+v", subTaskCtxCollectDiffLine, subTaskCtxCollectDiffLineWithGoGit)
+               fmt.Println(subTaskCtxCollectDiffLine.total, 
subTaskCtxCollectDiffLineWithGoGit.total)
+               assert.Equalf(t, subTaskCtxCollectDiffLine.total, 
subTaskCtxCollectDiffLineWithGoGit.total, "unexpected")
+       }
+}
diff --git a/backend/plugins/gitextractor/store/csv.go 
b/backend/plugins/gitextractor/store/csv.go
index 7d26008f6..8701501c5 100644
--- a/backend/plugins/gitextractor/store/csv.go
+++ b/backend/plugins/gitextractor/store/csv.go
@@ -190,5 +190,11 @@ func (c *CsvStore) Close() errors.Error {
        if c.snapshotWriter != nil {
                c.snapshotWriter.Close()
        }
+       if c.commitFileComponentWriter != nil {
+               c.commitFileComponentWriter.Close()
+       }
+       if c.commitLineChangeWriter != nil {
+               c.commitLineChangeWriter.Close()
+       }
        return nil
 }
diff --git a/backend/plugins/gitextractor/tasks/clone.go 
b/backend/plugins/gitextractor/tasks/clone.go
index 87479c328..0b88e2ff6 100644
--- a/backend/plugins/gitextractor/tasks/clone.go
+++ b/backend/plugins/gitextractor/tasks/clone.go
@@ -28,6 +28,8 @@ import (
        "strings"
 )
 
+const useGoGitInGitExtractor = "USE_GO_GIT_IN_GIT_EXTRACTOR"
+
 var CloneGitRepoMeta = plugin.SubTaskMeta{
        Name:             "cloneGitRepo",
        EntryPoint:       CloneGitRepo,
@@ -37,6 +39,16 @@ var CloneGitRepoMeta = plugin.SubTaskMeta{
        DomainTypes:      []string{plugin.DOMAIN_TYPE_CODE},
 }
 
+func useGoGit(subTaskCtx plugin.SubTaskContext, taskData 
*GitExtractorTaskData) bool {
+       if subTaskCtx.GetConfigReader().GetBool(useGoGitInGitExtractor) {
+               return true
+       }
+       if taskData != nil && taskData.Options.UseGoGit {
+               return true
+       }
+       return false
+}
+
 func CloneGitRepo(subTaskCtx plugin.SubTaskContext) errors.Error {
        taskData, ok := subTaskCtx.GetData().(*GitExtractorTaskData)
        if !ok {
@@ -44,7 +56,16 @@ func CloneGitRepo(subTaskCtx plugin.SubTaskContext) 
errors.Error {
        }
        op := taskData.Options
        storage := store.NewDatabase(subTaskCtx, op.RepoId)
-       repo, err := NewGitRepo(subTaskCtx, subTaskCtx.GetLogger(), storage, op)
+       var repo parser.RepoCollector
+       var err errors.Error
+       logger := subTaskCtx.GetLogger()
+       if useGoGit(subTaskCtx, taskData) {
+               logger.Info("use go-git in gitextractor")
+               repo, err = NewGoGitRepo(subTaskCtx, logger, storage, op)
+       } else {
+               logger.Info("use libgit2 in gitextractor")
+               repo, err = NewGitRepo(subTaskCtx, logger, storage, op)
+       }
        if err != nil {
                return err
        }
@@ -54,9 +75,9 @@ func CloneGitRepo(subTaskCtx plugin.SubTaskContext) 
errors.Error {
 }
 
 // NewGitRepo create and return a new parser git repo
-func NewGitRepo(ctx plugin.SubTaskContext, logger log.Logger, storage 
models.Store, op *GitExtractorOptions) (*parser.GitRepo, errors.Error) {
+func NewGitRepo(ctx plugin.SubTaskContext, logger log.Logger, storage 
models.Store, op *GitExtractorOptions) (parser.RepoCollector, errors.Error) {
        var err errors.Error
-       var repo *parser.GitRepo
+       var repo parser.RepoCollector
        p := parser.NewGitRepoCreator(storage, logger)
        if strings.HasPrefix(op.Url, "http") {
                repo, err = p.CloneOverHTTP(ctx, op.RepoId, op.Url, op.User, 
op.Password, op.Proxy)
@@ -69,3 +90,20 @@ func NewGitRepo(ctx plugin.SubTaskContext, logger 
log.Logger, storage models.Sto
        }
        return repo, err
 }
+
+// NewGoGitRepo create and return a new parser git repo with go-git
+func NewGoGitRepo(ctx plugin.SubTaskContext, logger log.Logger, storage 
models.Store, op *GitExtractorOptions) (parser.RepoCollector, errors.Error) {
+       var err errors.Error
+       var repo parser.RepoCollector
+       p := parser.NewGitRepoCreator(storage, logger)
+       if strings.HasPrefix(op.Url, "http") {
+               repo, err = p.CloneGoGitRepoOverHTTP(ctx, op.RepoId, op.Url, 
op.User, op.Password, op.Proxy)
+       } else if url := strings.TrimPrefix(op.Url, "ssh://"); 
strings.HasPrefix(url, "git@") {
+               repo, err = p.CloneGoGitRepoOverSSH(ctx, op.RepoId, url, 
op.PrivateKey, op.Passphrase)
+       } else if strings.HasPrefix(op.Url, "/") {
+               repo, err = p.LocalGoGitRepo(op.Url, op.RepoId)
+       } else {
+               return nil, errors.BadInput.New(fmt.Sprintf("unsupported url 
[%s]", op.Url))
+       }
+       return repo, err
+}
diff --git a/backend/plugins/gitextractor/tasks/git_repo_collector.go 
b/backend/plugins/gitextractor/tasks/git_repo_collector.go
index d5132fa6c..5af4fe98a 100644
--- a/backend/plugins/gitextractor/tasks/git_repo_collector.go
+++ b/backend/plugins/gitextractor/tasks/git_repo_collector.go
@@ -27,7 +27,7 @@ import (
 
 type GitExtractorTaskData struct {
        Options *GitExtractorOptions
-       GitRepo *parser.GitRepo
+       GitRepo parser.RepoCollector
 }
 
 type GitExtractorOptions struct {
@@ -39,6 +39,7 @@ type GitExtractorOptions struct {
        PrivateKey string `json:"privateKey"`
        Passphrase string `json:"passphrase"`
        Proxy      string `json:"proxy"`
+       UseGoGit   bool   `json:"use_go_git"`
 }
 
 func (o GitExtractorOptions) Valid() errors.Error {
@@ -60,11 +61,11 @@ func CollectGitCommits(subTaskCtx plugin.SubTaskContext) 
errors.Error {
        if count, err := repo.CountCommits(subTaskCtx.GetContext()); err != nil 
{
                subTaskCtx.GetLogger().Error(err, "unable to get commit count")
                subTaskCtx.SetProgress(0, -1)
-               return err
+               return errors.Convert(err)
        } else {
                subTaskCtx.SetProgress(0, count)
        }
-       return repo.CollectCommits(subTaskCtx)
+       return errors.Convert(repo.CollectCommits(subTaskCtx))
 }
 
 func CollectGitBranches(subTaskCtx plugin.SubTaskContext) errors.Error {
@@ -72,38 +73,38 @@ func CollectGitBranches(subTaskCtx plugin.SubTaskContext) 
errors.Error {
        if count, err := repo.CountBranches(subTaskCtx.GetContext()); err != 
nil {
                subTaskCtx.GetLogger().Error(err, "unable to get branch count")
                subTaskCtx.SetProgress(0, -1)
-               return err
+               return errors.Convert(err)
        } else {
                subTaskCtx.SetProgress(0, count)
        }
-       return repo.CollectBranches(subTaskCtx)
+       return errors.Convert(repo.CollectBranches(subTaskCtx))
 }
 
 func CollectGitTags(subTaskCtx plugin.SubTaskContext) errors.Error {
        repo := getGitRepo(subTaskCtx)
-       if count, err := repo.CountTags(); err != nil {
+       if count, err := repo.CountTags(subTaskCtx.GetContext()); err != nil {
                subTaskCtx.GetLogger().Error(err, "unable to get tag count")
                subTaskCtx.SetProgress(0, -1)
-               return err
+               return errors.Convert(err)
        } else {
                subTaskCtx.SetProgress(0, count)
        }
-       return repo.CollectTags(subTaskCtx)
+       return errors.Convert(repo.CollectTags(subTaskCtx))
 }
 
 func CollectGitDiffLines(subTaskCtx plugin.SubTaskContext) errors.Error {
        repo := getGitRepo(subTaskCtx)
-       if count, err := repo.CountTags(); err != nil {
+       if count, err := repo.CountTags(subTaskCtx.GetContext()); err != nil {
                subTaskCtx.GetLogger().Error(err, "unable to get line content")
                subTaskCtx.SetProgress(0, -1)
-               return err
+               return errors.Convert(err)
        } else {
                subTaskCtx.SetProgress(0, count)
        }
-       return repo.CollectDiffLine(subTaskCtx)
+       return errors.Convert(repo.CollectDiffLine(subTaskCtx))
 }
 
-func getGitRepo(subTaskCtx plugin.SubTaskContext) *parser.GitRepo {
+func getGitRepo(subTaskCtx plugin.SubTaskContext) parser.RepoCollector {
        taskData, ok := subTaskCtx.GetData().(*GitExtractorTaskData)
        if !ok {
                panic("git repo reference not found on context")
diff --git a/env.example b/env.example
index e615f98d7..aab1ff22a 100755
--- a/env.example
+++ b/env.example
@@ -64,6 +64,10 @@ ENCRYPTION_SECRET=
 IN_SECURE_SKIP_VERIFY=
 
 ##########################
+# In plugin gitextractor, use go-git to collector repo's data
+##########################
+USE_GO_GIT_IN_GIT_EXTRACTOR=false
+
 # Set if response error when requesting /connections/{connection_id}/test 
should be wrapped or not
 ##########################
 WRAP_RESPONSE_ERROR=

Reply via email to