This is an automated email from the ASF dual-hosted git repository.
lynwee pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git
The following commit(s) were added to refs/heads/main by this push:
new 14d5dd1cf feat #6615, replace libgit2 with go-git. (#6701)
14d5dd1cf is described below
commit 14d5dd1cfc6a1c9398018622a3b3adc317ec11c7
Author: Lynwee <[email protected]>
AuthorDate: Tue Mar 5 11:42:36 2024 +0800
feat #6615, replace libgit2 with go-git. (#6701)
* feat(gitextractor): add "CountTags/CountBranches/CountCommits"WithGoGit
* feat(gitextractor): stash codes
* fix(gitextractor): refact codes to make it easy to handle
* refactor(gitextractor): add an interface for replacing libgit2 with go-git
* feat(gitextractor): stash current works
* feat(gitextractor): support collect repo data with go-git
* fix(mod): remove replacement
* fix(gitextractor): fix lint errors
* fix(gitextractor): works with lower golang version
* fix(gitextractor): fix lint errors
* fix(gitextractor): fix test errors
* fix(unittest): try to fix
* fix(gitextractor): fix test
* fix(gitextractor): fix test
* fix(gitextractor): update config the value of
'USE_GO_GIT_IN_GIT_EXTRACTOR'
* fix(gitextractor): remove hardcoding path
---
.gitignore | 5 +-
backend/core/models/domainlayer/domainlayer.go | 7 +
backend/go.mod | 14 +-
backend/go.sum | 15 +
backend/plugins/gitextractor/impl/impl.go | 2 +-
backend/plugins/gitextractor/main.go | 2 +-
backend/plugins/gitextractor/parser/clone.go | 59 ++-
.../gitextractor/parser/clone_gogit.go} | 22 +-
.../gitextractor/parser/clone_libgit2.go} | 22 +-
backend/plugins/gitextractor/parser/clone_test.go | 75 +--
backend/plugins/gitextractor/parser/repo.go | 578 +--------------------
.../plugins/gitextractor/parser/repo_creator.go | 24 +-
backend/plugins/gitextractor/parser/repo_gogit.go | 546 +++++++++++++++++++
.../parser/{repo.go => repo_libgit2.go} | 48 +-
backend/plugins/gitextractor/parser/repo_test.go | 302 +++++++++++
backend/plugins/gitextractor/store/csv.go | 6 +
backend/plugins/gitextractor/tasks/clone.go | 44 +-
.../gitextractor/tasks/git_repo_collector.go | 25 +-
env.example | 4 +
19 files changed, 1098 insertions(+), 702 deletions(-)
diff --git a/.gitignore b/.gitignore
index a9d2bbe8a..0aa085362 100644
--- a/.gitignore
+++ b/.gitignore
@@ -159,4 +159,7 @@ backend/server/api/docs/docs.go
# python
*.pyc
__pycache__
-venv
\ No newline at end of file
+venv
+
+/backend/plugins/gitextractor/parser/output*
+/backend/plugins/gitextractor/parser/demo_repo/
\ No newline at end of file
diff --git a/backend/core/models/domainlayer/domainlayer.go
b/backend/core/models/domainlayer/domainlayer.go
index 2e899173d..f11570d40 100644
--- a/backend/core/models/domainlayer/domainlayer.go
+++ b/backend/core/models/domainlayer/domainlayer.go
@@ -37,3 +37,10 @@ func NewDomainEntity(id string) DomainEntity {
NoPKModel: common.NewNoPKModel(),
}
}
+
+func NewDomainEntityExtended(id string) DomainEntityExtended {
+ return DomainEntityExtended{
+ Id: id,
+ NoPKModel: common.NewNoPKModel(),
+ }
+}
diff --git a/backend/go.mod b/backend/go.mod
index 8d0f833b8..ab9e5f250 100644
--- a/backend/go.mod
+++ b/backend/go.mod
@@ -7,7 +7,7 @@ require (
github.com/gin-contrib/cors v1.3.1
github.com/gin-gonic/gin v1.9.1
github.com/go-errors/errors v1.4.2 // indirect
- github.com/go-git/go-git/v5 v5.9.0
+ github.com/go-git/go-git/v5 v5.11.0
github.com/go-playground/validator/v10 v10.14.1
github.com/gocarina/gocsv v0.0.0-20220707092902-b9da1f06c77e
github.com/google/uuid v1.3.0
@@ -24,13 +24,13 @@ require (
github.com/spf13/cast v1.5.1
github.com/spf13/cobra v1.5.0
github.com/spf13/viper v1.8.1
- github.com/stretchr/testify v1.8.3
+ github.com/stretchr/testify v1.8.4
github.com/swaggo/files v1.0.1
github.com/swaggo/gin-swagger v1.6.0
github.com/swaggo/swag v1.16.1
github.com/tidwall/gjson v1.14.3
github.com/viant/afs v1.16.0
- golang.org/x/crypto v0.14.0
+ golang.org/x/crypto v0.16.0
golang.org/x/exp v0.0.0-20221028150844-83b7d23a625f
golang.org/x/oauth2 v0.0.0-20210402161424-2e8d93401602
golang.org/x/sync v0.3.0
@@ -106,9 +106,9 @@ require (
github.com/ugorji/go/codec v1.2.11 // indirect
github.com/xanzy/ssh-agent v0.3.3 // indirect
golang.org/x/arch v0.3.0 // indirect
- golang.org/x/net v0.16.0 // indirect
- golang.org/x/sys v0.13.0 // indirect
- golang.org/x/text v0.13.0 // indirect
+ golang.org/x/net v0.19.0 // indirect
+ golang.org/x/sys v0.15.0 // indirect
+ golang.org/x/text v0.14.0 // indirect
golang.org/x/tools v0.13.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.30.0 // indirect
@@ -123,5 +123,3 @@ require (
github.com/golang-jwt/jwt/v5 v5.0.0-rc.1
golang.org/x/mod v0.13.0
)
-
-//replace github.com/apache/incubator-devlake => ./
diff --git a/backend/go.sum b/backend/go.sum
index f8e34f61a..7b42dbbf9 100644
--- a/backend/go.sum
+++ b/backend/go.sum
@@ -171,8 +171,11 @@ github.com/go-git/gcfg
v1.5.1-0.20230307220236-3a3c6141e376/go.mod h1:an3vInlBmS
github.com/go-git/go-billy/v5 v5.5.0
h1:yEY4yhzCDuMGSv83oGxiBotRzhwhNr8VZyphhiu+mTU=
github.com/go-git/go-billy/v5 v5.5.0/go.mod
h1:hmexnoNsr2SJU1Ju67OaNz5ASJY3+sHgFRpCtpDCKow=
github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20230305113008-0c11038e723f
h1:Pz0DHeFij3XFhoBRGUDPzSJ+w2UcK5/0JvF8DRI58r8=
+github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399
h1:eMje31YglSBqCdIqdhKBW8lokaMrL3uTkpGYlE2OOT4=
github.com/go-git/go-git/v5 v5.9.0
h1:cD9SFA7sHVRdJ7AYck1ZaAa/yeuBvGPxwXDL8cxrObY=
github.com/go-git/go-git/v5 v5.9.0/go.mod
h1:RKIqga24sWdMGZF+1Ekv9kylsDz6LzdTSI2s/OsZWE0=
+github.com/go-git/go-git/v5 v5.11.0
h1:XIZc1p+8YzypNr34itUfSvYJcv+eYdTnTvOZ2vD3cA4=
+github.com/go-git/go-git/v5 v5.11.0/go.mod
h1:6GFcX2P3NM7FPBfpePbpLd21XxsgdAt+lKqXmCUiUCY=
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod
h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod
h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod
h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
@@ -272,6 +275,7 @@ github.com/google/go-cmp v0.5.3/go.mod
h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/
github.com/google/go-cmp v0.5.4/go.mod
h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod
h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-querystring v1.0.0/go.mod
h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=
github.com/google/gofuzz v1.0.0/go.mod
h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/martian v2.1.0+incompatible/go.mod
h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
@@ -621,6 +625,8 @@ github.com/stretchr/testify v1.8.1/go.mod
h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o
github.com/stretchr/testify v1.8.2/go.mod
h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.3
h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY=
github.com/stretchr/testify v1.8.3/go.mod
h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/stretchr/testify v1.8.4
h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
+github.com/stretchr/testify v1.8.4/go.mod
h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/subosito/gotenv v1.2.0
h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s=
github.com/subosito/gotenv v1.2.0/go.mod
h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE=
@@ -714,6 +720,8 @@ golang.org/x/crypto
v0.3.1-0.20221117191849-2c476679df9a/go.mod h1:hebNnKkNXi2Uz
golang.org/x/crypto v0.7.0/go.mod
h1:pYwdfH91IfpZVANVyUOhSIPZaFoJGxTFbZhFTx+dXZU=
golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
golang.org/x/crypto v0.14.0/go.mod
h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
+golang.org/x/crypto v0.16.0 h1:mMMrFzRSCF0GvB7Ne27XVtVAaXLrPmgPC7/v0tkwHaY=
+golang.org/x/crypto v0.16.0/go.mod
h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod
h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod
h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@@ -806,6 +814,8 @@ golang.org/x/net v0.7.0/go.mod
h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
golang.org/x/net v0.16.0 h1:7eBu7KsSvFDtSXUIDbh3aqlK4DPsZ1rByC8PFfBThos=
golang.org/x/net v0.16.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
+golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c=
+golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod
h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod
h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod
h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -906,6 +916,8 @@ golang.org/x/sys v0.5.0/go.mod
h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.15.0 h1:h48lPFYpsTvQJZF4EKyI4aLHaev3CxivZmv7yZig9pc=
+golang.org/x/sys v0.15.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod
h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod
h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod
h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
@@ -913,6 +925,7 @@ golang.org/x/term v0.2.0/go.mod
h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
+golang.org/x/term v0.15.0 h1:y/Oo/a/q3IXu26lQgl04j/gjuBDOBlx7X6Om1j2CPW4=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod
h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod
h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@@ -927,6 +940,8 @@ golang.org/x/text v0.7.0/go.mod
h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
golang.org/x/text v0.13.0/go.mod
h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
+golang.org/x/text v0.14.0/go.mod
h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod
h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod
h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod
h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
diff --git a/backend/plugins/gitextractor/impl/impl.go
b/backend/plugins/gitextractor/impl/impl.go
index 31eae69e4..d8d5413d4 100644
--- a/backend/plugins/gitextractor/impl/impl.go
+++ b/backend/plugins/gitextractor/impl/impl.go
@@ -74,7 +74,7 @@ func (p GitExtractor) PrepareTaskData(taskCtx
plugin.TaskContext, options map[st
func (p GitExtractor) Close(taskCtx plugin.TaskContext) errors.Error {
if taskData, ok := taskCtx.GetData().(*tasks.GitExtractorTaskData); ok {
if taskData.GitRepo != nil {
- if err := taskData.GitRepo.Close(); err != nil {
+ if err := taskData.GitRepo.Close(taskCtx.GetContext());
err != nil {
return errors.Convert(err)
}
}
diff --git a/backend/plugins/gitextractor/main.go
b/backend/plugins/gitextractor/main.go
index a62f1561d..f9cc30490 100644
--- a/backend/plugins/gitextractor/main.go
+++ b/backend/plugins/gitextractor/main.go
@@ -88,7 +88,7 @@ func main() {
if err != nil {
panic(err)
}
- defer repo.Close()
+ defer repo.Close(ctx)
if err = repo.CollectAll(subTaskCtx); err != nil {
panic(err)
}
diff --git a/backend/plugins/gitextractor/parser/clone.go
b/backend/plugins/gitextractor/parser/clone.go
index 085e7e3f3..f8739e731 100644
--- a/backend/plugins/gitextractor/parser/clone.go
+++ b/backend/plugins/gitextractor/parser/clone.go
@@ -27,7 +27,6 @@ import (
"github.com/apache/incubator-devlake/core/plugin"
"github.com/go-git/go-git/v5/plumbing/protocol/packp/capability"
"github.com/go-git/go-git/v5/plumbing/transport"
-
"net"
"net/http"
neturl "net/url"
@@ -75,8 +74,8 @@ func cloneOverSSH(ctx plugin.SubTaskContext, url, dir,
passphrase string, pk []b
return nil
}
-func (l *GitRepoCreator) CloneOverHTTP(ctx plugin.SubTaskContext, repoId, url,
user, password, proxy string) (*GitRepo, errors.Error) {
- return withTempDirectory(func(dir string) (*GitRepo, error) {
+func (l *GitRepoCreator) cloneOverHTTP(ctx plugin.SubTaskContext, withGoGit
bool, repoId, url, user, password, proxy string) (RepoCollector, errors.Error) {
+ return withTempDirectory(func(dir string) (RepoCollector, error) {
var data []byte
buf := bytes.NewBuffer(data)
done := make(chan struct{}, 1)
@@ -127,12 +126,15 @@ func (l *GitRepoCreator) CloneOverHTTP(ctx
plugin.SubTaskContext, repoId, url, u
l.logger.Error(err, "PlainCloneContext")
return nil, err
}
+ if withGoGit {
+ return l.LocalGoGitRepo(dir, repoId)
+ }
return l.LocalRepo(dir, repoId)
})
}
-func (l *GitRepoCreator) CloneOverSSH(ctx plugin.SubTaskContext, repoId, url,
privateKey, passphrase string) (*GitRepo, errors.Error) {
- return withTempDirectory(func(dir string) (*GitRepo, error) {
+func (l *GitRepoCreator) cloneOverSSH(ctx plugin.SubTaskContext, withGoGit
bool, repoId, url, privateKey, passphrase string) (RepoCollector, errors.Error)
{
+ return withTempDirectory(func(dir string) (RepoCollector, error) {
pk, err := base64.StdEncoding.DecodeString(privateKey)
if err != nil {
return nil, err
@@ -141,31 +143,13 @@ func (l *GitRepoCreator) CloneOverSSH(ctx
plugin.SubTaskContext, repoId, url, pr
if err != nil {
return nil, err
}
+ if withGoGit {
+ return l.LocalGoGitRepo(dir, repoId)
+ }
return l.LocalRepo(dir, repoId)
})
}
-func withTempDirectory(f func(tempDir string) (*GitRepo, error)) (*GitRepo,
errors.Error) {
- dir, err := os.MkdirTemp("", "gitextractor")
- if err != nil {
- return nil, errors.Convert(err)
- }
- cleanup := func() {
- _ = os.RemoveAll(dir)
- }
- defer func() {
- if err != nil {
- cleanup()
- }
- }()
- repo, err := f(dir)
- if err != nil {
- return nil, errors.Convert(err)
- }
- repo.cleanup = cleanup
- return repo, errors.Convert(err)
-}
-
func setCloneProgress(subTaskCtx plugin.SubTaskContext, cloneProgressInfo
string) {
if cloneProgressInfo == "" {
return
@@ -208,3 +192,26 @@ func refreshCloneProgress(subTaskCtx
plugin.SubTaskContext, done chan struct{},
func isAzureRepo(ctx context.Context, repoUrl string) bool {
return strings.Contains(repoUrl, "dev.azure.com")
}
+
+func withTempDirectory(f func(tempDir string) (RepoCollector, error))
(RepoCollector, errors.Error) {
+ dir, err := os.MkdirTemp("", "gitextractor")
+ if err != nil {
+ return nil, errors.Convert(err)
+ }
+ cleanup := func() {
+ _ = os.RemoveAll(dir)
+ }
+ defer func() {
+ if err != nil {
+ cleanup()
+ }
+ }()
+ repo, err := f(dir)
+ if err != nil {
+ return nil, errors.Convert(err)
+ }
+ if err := repo.SetCleanUp(cleanup); err != nil {
+ return nil, errors.Convert(err)
+ }
+ return repo, nil
+}
diff --git a/backend/core/models/domainlayer/domainlayer.go
b/backend/plugins/gitextractor/parser/clone_gogit.go
similarity index 54%
copy from backend/core/models/domainlayer/domainlayer.go
copy to backend/plugins/gitextractor/parser/clone_gogit.go
index 2e899173d..93b392d03 100644
--- a/backend/core/models/domainlayer/domainlayer.go
+++ b/backend/plugins/gitextractor/parser/clone_gogit.go
@@ -15,25 +15,17 @@ See the License for the specific language governing
permissions and
limitations under the License.
*/
-package domainlayer
+package parser
import (
- "github.com/apache/incubator-devlake/core/models/common"
+ "github.com/apache/incubator-devlake/core/errors"
+ "github.com/apache/incubator-devlake/core/plugin"
)
-type DomainEntity struct {
- Id string `json:"id" gorm:"primaryKey;type:varchar(255);comment:This
key is generated based on details from the original plugin"` // format:
<Plugin>:<Entity>:<PK0>:<PK1>
- common.NoPKModel
+func (l *GitRepoCreator) CloneGoGitRepoOverHTTP(ctx plugin.SubTaskContext,
repoId, url, user, password, proxy string) (RepoCollector, errors.Error) {
+ return l.cloneOverHTTP(ctx, true, repoId, url, user, password, proxy)
}
-type DomainEntityExtended struct {
- Id string `json:"id" gorm:"primaryKey;type:varchar(500);comment:This
key is generated based on details from the original plugin"` // format:
<Plugin>:<Entity>:<PK0>:<PK1>
- common.NoPKModel
-}
-
-func NewDomainEntity(id string) DomainEntity {
- return DomainEntity{
- Id: id,
- NoPKModel: common.NewNoPKModel(),
- }
+func (l *GitRepoCreator) CloneGoGitRepoOverSSH(ctx plugin.SubTaskContext,
repoId, url, privateKey, passphrase string) (RepoCollector, errors.Error) {
+ return l.cloneOverSSH(ctx, true, repoId, url, privateKey, passphrase)
}
diff --git a/backend/core/models/domainlayer/domainlayer.go
b/backend/plugins/gitextractor/parser/clone_libgit2.go
similarity index 54%
copy from backend/core/models/domainlayer/domainlayer.go
copy to backend/plugins/gitextractor/parser/clone_libgit2.go
index 2e899173d..20eafd282 100644
--- a/backend/core/models/domainlayer/domainlayer.go
+++ b/backend/plugins/gitextractor/parser/clone_libgit2.go
@@ -15,25 +15,17 @@ See the License for the specific language governing
permissions and
limitations under the License.
*/
-package domainlayer
+package parser
import (
- "github.com/apache/incubator-devlake/core/models/common"
+ "github.com/apache/incubator-devlake/core/errors"
+ "github.com/apache/incubator-devlake/core/plugin"
)
-type DomainEntity struct {
- Id string `json:"id" gorm:"primaryKey;type:varchar(255);comment:This
key is generated based on details from the original plugin"` // format:
<Plugin>:<Entity>:<PK0>:<PK1>
- common.NoPKModel
+func (l *GitRepoCreator) CloneOverHTTP(ctx plugin.SubTaskContext, repoId, url,
user, password, proxy string) (RepoCollector, errors.Error) {
+ return l.cloneOverHTTP(ctx, false, repoId, url, user, password, proxy)
}
-type DomainEntityExtended struct {
- Id string `json:"id" gorm:"primaryKey;type:varchar(500);comment:This
key is generated based on details from the original plugin"` // format:
<Plugin>:<Entity>:<PK0>:<PK1>
- common.NoPKModel
-}
-
-func NewDomainEntity(id string) DomainEntity {
- return DomainEntity{
- Id: id,
- NoPKModel: common.NewNoPKModel(),
- }
+func (l *GitRepoCreator) CloneOverSSH(ctx plugin.SubTaskContext, repoId, url,
privateKey, passphrase string) (RepoCollector, errors.Error) {
+ return l.cloneOverSSH(ctx, false, repoId, url, privateKey, passphrase)
}
diff --git a/backend/plugins/gitextractor/parser/clone_test.go
b/backend/plugins/gitextractor/parser/clone_test.go
index 562998f39..3e63e2af6 100644
--- a/backend/plugins/gitextractor/parser/clone_test.go
+++ b/backend/plugins/gitextractor/parser/clone_test.go
@@ -19,12 +19,14 @@ package parser
import (
gocontext "context"
- "fmt"
"github.com/apache/incubator-devlake/core/config"
"github.com/apache/incubator-devlake/core/context"
"github.com/apache/incubator-devlake/core/dal"
"github.com/apache/incubator-devlake/core/log"
"github.com/apache/incubator-devlake/core/plugin"
+ "github.com/apache/incubator-devlake/impls/dalgorm"
+ "gorm.io/driver/mysql"
+ "gorm.io/gorm"
"testing"
)
@@ -40,7 +42,7 @@ func Test_setCloneProgress(t *testing.T) {
{
name: "test-0",
args: args{
- subTaskCtx: testSubTaskContext{},
+ subTaskCtx: &testSubTaskContext{},
cloneProgressInfo: `
Enumerating objects: 103, done.
Counting objects: 100% (103/103), done.
@@ -51,7 +53,7 @@ func Test_setCloneProgress(t *testing.T) {
{
name: "test-1",
args: args{
- subTaskCtx: testSubTaskContext{},
+ subTaskCtx: &testSubTaskContext{},
cloneProgressInfo: `
Enumerating objects: 103, done.
Counting objects: 100% (103/103), done.
@@ -66,64 +68,73 @@ func Test_setCloneProgress(t *testing.T) {
}
}
-type testSubTaskContext struct{}
+type testSubTaskContext struct {
+ current int
+ total int
+ Name string
+}
-func (testSubTaskContext) GetConfigReader() config.ConfigReader {
- //TODO implement me
- panic("implement me")
+func (ctx *testSubTaskContext) GetConfigReader() config.ConfigReader {
+ cfg := config.GetConfig()
+ return cfg
}
-func (testSubTaskContext) GetConfig(name string) string {
- //TODO implement me
- panic("implement me")
+func (ctx *testSubTaskContext) GetConfig(name string) string {
+ return config.GetConfig().GetString(name)
}
-func (testSubTaskContext) GetLogger() log.Logger {
- //TODO implement me
- panic("implement me")
+func (ctx *testSubTaskContext) GetLogger() log.Logger {
+ return logger
}
-func (testSubTaskContext) NestedLogger(name string) context.BasicRes {
+func (ctx *testSubTaskContext) NestedLogger(name string) context.BasicRes {
//TODO implement me
panic("implement me")
}
-func (testSubTaskContext) ReplaceLogger(logger log.Logger) context.BasicRes {
+func (ctx *testSubTaskContext) ReplaceLogger(logger log.Logger)
context.BasicRes {
//TODO implement me
panic("implement me")
}
-func (testSubTaskContext) GetDal() dal.Dal {
- //TODO implement me
- panic("implement me")
+func (ctx *testSubTaskContext) GetDal() dal.Dal {
+ //dsn :=
"mysql://root:[email protected]:3306/lake?charset=utf8mb4&parseTime=True&loc=UTC"
+ if runInLocal {
+ dsn :=
"merico:merico@tcp(127.0.0.1:3306)/lake?charset=utf8mb4&parseTime=True&loc=Local"
+ db, err := gorm.Open(mysql.Open(dsn), &gorm.Config{})
+ if err != nil {
+ panic(err)
+ }
+ return dalgorm.NewDalgorm(db)
+ } else {
+ panic("implement me")
+ }
}
-func (testSubTaskContext) GetName() string {
- //TODO implement me
- panic("implement me")
+func (ctx *testSubTaskContext) GetName() string {
+ return ctx.Name
}
-func (testSubTaskContext) GetContext() gocontext.Context {
- //TODO implement me
- panic("implement me")
+func (ctx *testSubTaskContext) GetContext() gocontext.Context {
+ return gocontext.Background()
}
-func (testSubTaskContext) GetData() interface{} {
+func (ctx *testSubTaskContext) GetData() interface{} {
//TODO implement me
panic("implement me")
}
-func (testSubTaskContext) SetProgress(current int, total int) {
- //TODO implement me
- fmt.Printf("set current: %d, total: %d\n", current, total)
+func (ctx *testSubTaskContext) SetProgress(current int, total int) {
+ ctx.current = current
+ ctx.total = total
}
-func (testSubTaskContext) IncProgress(quantity int) {
- //TODO implement me
- panic("implement me")
+func (ctx *testSubTaskContext) IncProgress(quantity int) {
+ ctx.current += quantity
+ ctx.total += quantity
}
-func (testSubTaskContext) TaskContext() plugin.TaskContext {
+func (ctx *testSubTaskContext) TaskContext() plugin.TaskContext {
//TODO implement me
panic("implement me")
}
diff --git a/backend/plugins/gitextractor/parser/repo.go
b/backend/plugins/gitextractor/parser/repo.go
index 1a3a90d22..0077f2b52 100644
--- a/backend/plugins/gitextractor/parser/repo.go
+++ b/backend/plugins/gitextractor/parser/repo.go
@@ -19,577 +19,21 @@ package parser
import (
"context"
- "crypto/sha256"
- "encoding/hex"
- "fmt"
- "regexp"
- "sort"
- "strconv"
-
- "github.com/apache/incubator-devlake/core/config"
- "github.com/apache/incubator-devlake/core/dal"
- "github.com/apache/incubator-devlake/core/errors"
- "github.com/apache/incubator-devlake/core/log"
- "github.com/apache/incubator-devlake/core/models/domainlayer"
- "github.com/apache/incubator-devlake/core/models/domainlayer/code"
"github.com/apache/incubator-devlake/core/plugin"
- "github.com/apache/incubator-devlake/plugins/gitextractor/models"
-
- git "github.com/libgit2/git2go/v33"
)
-const SkipCommitFiles = "SKIP_COMMIT_FILES"
-
-var TypeNotMatchError = "the requested type does not match the type in the ODB"
-
-type GitRepo struct {
- store models.Store
- logger log.Logger
- id string
- repo *git.Repository
- cleanup func()
-}
-
-// CollectAll The main parser subtask
-func (r *GitRepo) CollectAll(subtaskCtx plugin.SubTaskContext) errors.Error {
- subtaskCtx.SetProgress(0, -1)
- err := r.CollectTags(subtaskCtx)
- if err != nil {
- return err
- }
- err = r.CollectBranches(subtaskCtx)
- if err != nil {
- return err
- }
- err = r.CollectCommits(subtaskCtx)
- if err != nil {
- return err
- }
- return r.CollectDiffLine(subtaskCtx)
-}
-
-// Close resources
-func (r *GitRepo) Close() errors.Error {
- defer func() {
- if r.cleanup != nil {
- r.cleanup()
- }
- }()
- return r.store.Close()
-}
-
-// CountTags Count git tags subtask
-func (r *GitRepo) CountTags() (int, errors.Error) {
- tags, err := r.repo.Tags.List()
- if err != nil {
- return 0, errors.Convert(err)
- }
- return len(tags), nil
-}
-
-// CountBranches count the number of branches in a git repo
-func (r *GitRepo) CountBranches(ctx context.Context) (int, errors.Error) {
- var branchIter *git.BranchIterator
- branchIter, err := r.repo.NewBranchIterator(git.BranchAll)
- if err != nil {
- return 0, errors.Convert(err)
- }
- count := 0
- err = branchIter.ForEach(func(branch *git.Branch, branchType
git.BranchType) error {
- select {
- case <-ctx.Done():
- return ctx.Err()
- default:
- }
- if branch.IsBranch() || branch.IsRemote() {
- isHead, _ := branch.IsHead()
- if !isHead {
- count++
- }
- }
- return nil
- })
- return count, errors.Convert(err)
-}
-
-// CountCommits count the number of commits in a git repo
-func (r *GitRepo) CountCommits(ctx context.Context) (int, errors.Error) {
- odb, err := r.repo.Odb()
- if err != nil {
- return 0, errors.Convert(err)
- }
- count := 0
- err = odb.ForEach(func(id *git.Oid) error {
- select {
- case <-ctx.Done():
- return ctx.Err()
- default:
- }
- commit, e := r.repo.LookupCommit(id)
- if e != nil && e.Error() != TypeNotMatchError {
- return errors.Convert(e)
- }
- if commit != nil {
- count++
- }
- return nil
- })
- return count, errors.Convert(err)
-}
-
-// CollectTags Collect Tags data
-func (r *GitRepo) CollectTags(subtaskCtx plugin.SubTaskContext) errors.Error {
- return errors.Convert(r.repo.Tags.Foreach(func(name string, id
*git.Oid) error {
- select {
- case <-subtaskCtx.GetContext().Done():
- return subtaskCtx.GetContext().Err()
- default:
- }
- var err1 error
- var tag *git.Tag
- var tagCommit string
- tag, err1 = r.repo.LookupTag(id)
- if err1 != nil && err1.Error() != TypeNotMatchError {
- return errors.Convert(err1)
- }
- if tag != nil {
- tagCommit = tag.TargetId().String()
- } else {
- tagCommit = id.String()
- }
- r.logger.Info("tagCommit:%s", tagCommit)
- if tagCommit != "" {
- ref := &code.Ref{
- DomainEntityExtended:
domainlayer.DomainEntityExtended{Id: fmt.Sprintf("%s:%s", r.id, name)},
- RepoId: r.id,
- Name: name,
- CommitSha: tagCommit,
- RefType: TAG,
- }
- err1 = r.store.Refs(ref)
- if err1 != nil {
- return err1
- }
- subtaskCtx.IncProgress(1)
- }
- return nil
- }))
-}
-
-// CollectBranches Collect branch data
-func (r *GitRepo) CollectBranches(subtaskCtx plugin.SubTaskContext)
errors.Error {
- var repoInter *git.BranchIterator
- repoInter, err := r.repo.NewBranchIterator(git.BranchAll)
- if err != nil {
- return errors.Convert(err)
- }
- return errors.Convert(repoInter.ForEach(func(branch *git.Branch,
branchType git.BranchType) error {
- select {
- case <-subtaskCtx.GetContext().Done():
- return subtaskCtx.GetContext().Err()
- default:
- }
- if branch.IsBranch() || branch.IsRemote() {
- name, err1 := branch.Name()
- if err1 != nil && err1.Error() != TypeNotMatchError {
- return err1
- }
- var sha string
- if oid := branch.Target(); oid != nil {
- sha = oid.String()
- }
- ref := &code.Ref{
- DomainEntityExtended:
domainlayer.DomainEntityExtended{Id: fmt.Sprintf("%s:%s", r.id, name)},
- RepoId: r.id,
- Name: name,
- CommitSha: sha,
- RefType: BRANCH,
- }
- ref.IsDefault, err1 = branch.IsHead()
- if err1 != nil && err1.Error() != TypeNotMatchError {
- return err1
- }
- err1 = r.store.Refs(ref)
- if err1 != nil && err1.Error() != TypeNotMatchError {
- return err1
- }
- subtaskCtx.IncProgress(1)
- return nil
- }
- return nil
- }))
-}
-
-// CollectCommits Collect data from each commit, we can also get the diff line
-func (r *GitRepo) CollectCommits(subtaskCtx plugin.SubTaskContext)
errors.Error {
- opts, err := getDiffOpts()
- if err != nil {
- return err
- }
- db := subtaskCtx.GetDal()
- components := make([]code.Component, 0)
- err = db.All(&components, dal.From(components), dal.Where("repo_id= ?",
r.id))
- if err != nil {
- return err
- }
- componentMap := make(map[string]*regexp.Regexp)
- for _, component := range components {
- componentMap[component.Name] =
regexp.MustCompile(component.PathRegex)
- }
- odb, err := errors.Convert01(r.repo.Odb())
- if err != nil {
- return err
- }
- return errors.Convert(odb.ForEach(func(id *git.Oid) error {
- select {
- case <-subtaskCtx.GetContext().Done():
- return subtaskCtx.GetContext().Err()
- default:
- }
- commit, err1 := r.repo.LookupCommit(id)
- if err1 != nil && err1.Error() != TypeNotMatchError {
- return errors.Convert(err1)
- }
- if commit == nil {
- return nil
- }
- commitSha := commit.Id().String()
- r.logger.Debug("process commit: %s", commitSha)
- c := &code.Commit{
- Sha: commitSha,
- Message: commit.Message(),
- }
- author := commit.Author()
- if author != nil {
- c.AuthorName = author.Name
- c.AuthorEmail = author.Email
- c.AuthorId = author.Email
- c.AuthoredDate = author.When
- }
- committer := commit.Committer()
- if committer != nil {
- c.CommitterName = committer.Name
- c.CommitterEmail = committer.Email
- c.CommitterId = committer.Email
- c.CommittedDate = committer.When
- }
- err = r.storeParentCommits(commitSha, commit)
- if err != nil {
- return err
- }
- var parent *git.Commit
- if commit.ParentCount() > 0 {
- parent = commit.Parent(0)
- }
- var stats *git.DiffStats
- if stats, err = r.getDiffComparedToParent(c.Sha, commit,
parent, opts, componentMap); err != nil {
- return err
- }
- c.Additions += stats.Insertions()
- c.Deletions += stats.Deletions()
- err = r.store.Commits(c)
- if err != nil {
- return err
- }
- repoCommit := &code.RepoCommit{
- RepoId: r.id,
- CommitSha: c.Sha,
- }
- err = r.store.RepoCommits(repoCommit)
- if err != nil {
- return err
- }
- subtaskCtx.IncProgress(1)
- return nil
- }))
-}
-
-func (r *GitRepo) storeParentCommits(commitSha string, commit *git.Commit)
errors.Error {
- var commitParents []*code.CommitParent
- for i := uint(0); i < commit.ParentCount(); i++ {
- parent := commit.Parent(i)
- if parent != nil {
- if parentId := parent.Id(); parentId != nil {
- commitParents = append(commitParents,
&code.CommitParent{
- CommitSha: commitSha,
- ParentCommitSha: parentId.String(),
- })
- }
- }
- }
- return r.store.CommitParents(commitParents)
-}
-
-func (r *GitRepo) getDiffComparedToParent(commitSha string, commit
*git.Commit, parent *git.Commit, opts *git.DiffOptions, componentMap
map[string]*regexp.Regexp) (*git.DiffStats, errors.Error) {
- var err error
- var parentTree, tree *git.Tree
- if parent != nil {
- parentTree, err = parent.Tree()
- }
- if err != nil {
- return nil, errors.Convert(err)
- }
- tree, err = commit.Tree()
- if err != nil {
- return nil, errors.Convert(err)
- }
- var diff *git.Diff
- diff, err = r.repo.DiffTreeToTree(parentTree, tree, opts)
- if err != nil {
- return nil, errors.Convert(err)
- }
- cfg := config.GetConfig()
- skipCommitFiles := cfg.GetBool(SkipCommitFiles)
- if !skipCommitFiles {
- err = r.storeCommitFilesFromDiff(commitSha, diff, componentMap)
- if err != nil {
- return nil, errors.Convert(err)
- }
- }
- var stats *git.DiffStats
- stats, err = diff.Stats()
- if err != nil {
- return nil, errors.Convert(err)
- }
- return stats, nil
-}
-
-func (r *GitRepo) storeCommitFilesFromDiff(commitSha string, diff *git.Diff,
componentMap map[string]*regexp.Regexp) errors.Error {
- var commitFile *code.CommitFile
- var commitFileComponent *code.CommitFileComponent
- var err error
- err = diff.ForEach(func(file git.DiffDelta, progress float64) (
- git.DiffForEachHunkCallback, error) {
- if commitFile != nil {
- err = r.store.CommitFiles(commitFile)
- if err != nil {
- r.logger.Error(err, "CommitFiles error")
- return nil, err
- }
- }
+type RepoCollector interface {
+ SetCleanUp(func()) error
+ Close(ctx context.Context) error
- commitFile = new(code.CommitFile)
- commitFile.CommitSha = commitSha
- commitFile.FilePath = file.NewFile.Path
+ CollectAll(subtaskCtx plugin.SubTaskContext) error
- // With some long path,the varchar(255) was not enough both ID
and file_path
- // So we use the hash to compress the path in ID and add length
of file_path.
- // Use commitSha and the sha256 of FilePath to create id
- shaFilePath := sha256.New()
- shaFilePath.Write([]byte(file.NewFile.Path))
- commitFile.Id = commitSha + ":" +
hex.EncodeToString(shaFilePath.Sum(nil))
-
- commitFileComponent = new(code.CommitFileComponent)
- for component, reg := range componentMap {
- if reg.MatchString(commitFile.FilePath) {
- commitFileComponent.ComponentName = component
- break
- }
- }
- commitFileComponent.CommitFileId = commitFile.Id
- if commitFileComponent.ComponentName == "" {
- commitFileComponent.ComponentName = "Default"
- }
- return func(hunk git.DiffHunk) (git.DiffForEachLineCallback,
error) {
- return func(line git.DiffLine) error {
- if line.Origin == git.DiffLineAddition {
- commitFile.Additions += line.NumLines
- }
- if line.Origin == git.DiffLineDeletion {
- commitFile.Deletions += line.NumLines
- }
- return nil
- }, nil
- }, nil
- }, git.DiffDetailLines)
- if commitFileComponent != nil {
- err = r.store.CommitFileComponents(commitFileComponent)
- if err != nil {
- r.logger.Error(err, "CommitFileComponents error")
- }
- }
- if commitFile != nil {
- err = r.store.CommitFiles(commitFile)
- if err != nil {
- r.logger.Error(err, "CommitFiles error")
- }
- }
- return errors.Convert(err)
-}
-
-// CollectDiffLine get line diff data from a specific branch
-func (r *GitRepo) CollectDiffLine(subtaskCtx plugin.SubTaskContext)
errors.Error {
- //Using this subtask,we can get every line change in every commit.
- //We maintain a snapshot structure to get which commit each deleted
line belongs to
- snapshot := make(map[string] /*file path*/ *models.FileBlame)
- repo := r.repo
- //step 1. get the reverse commit list
- commitList := make([]git.Commit, 0)
- //get currently head commitsha, dafault is master branch
- // check branch, if not master, checkout to branch's head
- commitOid, err1 := repo.Head()
- if err1 != nil && err1.Error() != TypeNotMatchError {
- return errors.Convert(err1)
- }
- //get head commit object and add into commitList
- commit, err1 := repo.LookupCommit(commitOid.Target())
- if err1 != nil && err1.Error() != TypeNotMatchError {
- return errors.Convert(err1)
- }
- commitList = append(commitList, *commit)
- // if current head has parents, get parent commitsha
- for commit != nil && commit.ParentCount() > 0 {
- pid := commit.ParentId(0)
- commit, err1 = repo.LookupCommit(pid)
- if err1 != nil && err1.Error() != TypeNotMatchError {
- return errors.Convert(err1)
- }
- commitList = append(commitList, *commit)
- }
- // reverse commitList
- for i, j := 0, len(commitList)-1; i < j; i, j = i+1, j-1 {
- commitList[i], commitList[j] = commitList[j], commitList[i]
- }
- //step 2. get the diff of each commit
- // for each commit, get the diff
- for _, commitsha := range commitList {
- curcommit, err := repo.LookupCommit(commitsha.Id())
- if err != nil {
- return errors.Convert(err)
- }
- if curcommit.ParentCount() == 0 || curcommit.ParentCount() > 0 {
- var parentTree, tree *git.Tree
- tree, err = curcommit.Tree()
- if err != nil {
- return errors.Convert(err)
- }
- var diff *git.Diff
- //FIXME error type convert
- opts, err := git.DefaultDiffOptions()
- opts.NotifyCallback = func(diffSoFar *git.Diff, delta
git.DiffDelta, matchedPathSpec string) error {
- return nil
- }
- if err != nil {
- return errors.Convert(err)
- }
- if curcommit.ParentCount() > 0 {
- parent := curcommit.Parent(0)
- parentTree, err = parent.Tree()
- }
- diff, err = repo.DiffTreeToTree(parentTree, tree, &opts)
- if err != nil {
- return errors.Convert(err)
- }
- deleted := make(models.DiffLines, 0)
- added := make(models.DiffLines, 0)
- var lastFile string
- lastFile = ""
- err = diff.ForEach(func(file git.DiffDelta, progress
float64) (git.DiffForEachHunkCallback, error) {
- //if doesn't exist in snapshot, create a new one
- if _, ok := snapshot[file.OldFile.Path]; !ok {
- fileBlame, err := models.NewFileBlame()
- if err != nil {
- r.logger.Info("Create FileBlame
Error")
- return nil, err
- }
- snapshot[file.OldFile.Path] =
(*models.FileBlame)(fileBlame)
- }
- if lastFile == "" {
- lastFile = file.NewFile.Path
- } else if lastFile != file.NewFile.Path {
- updateSnapshotFileBlame(curcommit,
deleted, added, lastFile, snapshot)
- //reset the deleted and added,last_file
now is current file
- deleted = make([]git.DiffLine, 0)
- added = make([]git.DiffLine, 0)
- lastFile = file.NewFile.Path
- }
- hunkNum := 0
- return func(hunk git.DiffHunk)
(git.DiffForEachLineCallback, error) {
- hunkNum++
- return func(line git.DiffLine) error {
- commitLineChange :=
&code.CommitLineChange{}
- commitLineChange.CommitSha =
curcommit.Id().String()
- commitLineChange.ChangedType =
line.Origin.String()
- commitLineChange.LineNoNew =
line.NewLineno
- commitLineChange.LineNoOld =
line.OldLineno
- commitLineChange.OldFilePath =
file.OldFile.Path
- commitLineChange.NewFilePath =
file.NewFile.Path
- commitLineChange.HunkNum =
hunkNum
- commitLineChange.Id =
curcommit.Id().String() + ":" + file.NewFile.Path + ":" +
strconv.Itoa(line.OldLineno) + ":" + strconv.Itoa(line.NewLineno)
- if line.Origin ==
git.DiffLineAddition {
- added = append(added,
line)
- } else if line.Origin ==
git.DiffLineDeletion {
- fb :=
snapshot[file.OldFile.Path]
- l :=
fb.Find(line.OldLineno)
- if l != nil && l.Value
!= nil {
- temp :=
snapshot[file.OldFile.Path].Find(line.OldLineno)
-
commitLineChange.PrevCommit = temp.Value.(string)
- } else {
-
r.logger.Info("err", file.OldFile.Path, line.OldLineno, curcommit.Id().String())
- }
- deleted =
append(deleted, line)
- }
- err =
r.store.CommitLineChange(commitLineChange)
- if err != nil {
- return
errors.Convert(err)
- }
- return nil
- }, nil
- }, nil
- }, git.DiffDetailLines)
- if err != nil {
- return errors.Convert(err)
- }
- //finally,process the last file in diff
- updateSnapshotFileBlame(curcommit, deleted, added,
lastFile, snapshot)
- }
- }
- r.logger.Info("line change collect success")
- db := subtaskCtx.GetDal()
- err := db.Delete(&code.RepoSnapshot{}, dal.Where("repo_id= ?", r.id))
- if err != nil {
- return errors.Convert(err)
- }
- for fp := range snapshot {
- temp := snapshot[fp]
- count := 0
- for e := temp.Lines.Front(); e != nil; e = e.Next() {
- count++
- snapshotLine := &code.RepoSnapshot{}
- snapshotLine.RepoId = r.id
- snapshotLine.LineNo = count
- snapshotLine.CommitSha = e.Value.(string)
- snapshotLine.FilePath = fp
- err := r.store.RepoSnapshot(snapshotLine)
- if err != nil {
- r.logger.Info("error")
- return err
- }
- }
-
- }
-
- r.logger.Info("collect snapshot finished")
- return nil
-}
-
-func updateSnapshotFileBlame(currentCommit *git.Commit, deleted
models.DiffLines, added models.DiffLines, lastFile string, snapshot
map[string]*models.FileBlame) {
- sort.Sort(deleted)
- for _, line := range deleted {
- snapshot[lastFile].RemoveLine(line.OldLineno)
- }
- for _, line := range added {
- snapshot[lastFile].AddLine(line.NewLineno,
currentCommit.Id().String())
- }
-}
+ CountTags(ctx context.Context) (int, error)
+ CountBranches(ctx context.Context) (int, error)
+ CountCommits(ctx context.Context) (int, error)
-func getDiffOpts() (*git.DiffOptions, errors.Error) {
- opts, err := git.DefaultDiffOptions()
- if err != nil {
- return nil, errors.Convert(err)
- }
- opts.NotifyCallback = func(diffSoFar *git.Diff, delta git.DiffDelta,
matchedPathSpec string) error {
- return nil
- }
- return &opts, nil
+ CollectTags(subtaskCtx plugin.SubTaskContext) error
+ CollectBranches(subtaskCtx plugin.SubTaskContext) error
+ CollectCommits(subtaskCtx plugin.SubTaskContext) error
+ CollectDiffLine(subtaskCtx plugin.SubTaskContext) error
}
diff --git a/backend/plugins/gitextractor/parser/repo_creator.go
b/backend/plugins/gitextractor/parser/repo_creator.go
index 0d6ec2e4a..8fef793d8 100644
--- a/backend/plugins/gitextractor/parser/repo_creator.go
+++ b/backend/plugins/gitextractor/parser/repo_creator.go
@@ -21,12 +21,14 @@ import (
"github.com/apache/incubator-devlake/core/errors"
"github.com/apache/incubator-devlake/core/log"
"github.com/apache/incubator-devlake/plugins/gitextractor/models"
+ gogit "github.com/go-git/go-git/v5"
git "github.com/libgit2/git2go/v33"
)
const (
- BRANCH = "BRANCH"
- TAG = "TAG"
+ BRANCH = "BRANCH"
+ TAG = "TAG"
+ EnableGoGit = true
)
type GitRepoCreator struct {
@@ -59,3 +61,21 @@ func (l *GitRepoCreator) newGitRepo(repoId string, repo
*git.Repository) *GitRep
repo: repo,
}
}
+
+// LocalGoGitRepo open a local repository with go-git
+func (l *GitRepoCreator) LocalGoGitRepo(repoPath, repoId string) (*GoGitRepo,
errors.Error) {
+ goGitRepo, err := gogit.PlainOpen(repoPath)
+ if err != nil {
+ return nil, errors.Convert(err)
+ }
+ return l.newGoGitRepo(repoId, goGitRepo), nil
+}
+
+func (l *GitRepoCreator) newGoGitRepo(repoId string, goGitRespo
*gogit.Repository) *GoGitRepo {
+ return &GoGitRepo{
+ logger: l.logger,
+ id: repoId,
+ repo: goGitRespo,
+ store: l.store,
+ }
+}
diff --git a/backend/plugins/gitextractor/parser/repo_gogit.go
b/backend/plugins/gitextractor/parser/repo_gogit.go
new file mode 100644
index 000000000..82be9b365
--- /dev/null
+++ b/backend/plugins/gitextractor/parser/repo_gogit.go
@@ -0,0 +1,546 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package parser
+
+import (
+ "context"
+ "crypto/sha256"
+ "encoding/hex"
+ "fmt"
+ "github.com/apache/incubator-devlake/core/dal"
+ "github.com/apache/incubator-devlake/core/errors"
+ "github.com/apache/incubator-devlake/core/log"
+ "github.com/apache/incubator-devlake/core/models/domainlayer"
+ "github.com/apache/incubator-devlake/core/models/domainlayer/code"
+ "github.com/apache/incubator-devlake/core/plugin"
+ "github.com/apache/incubator-devlake/plugins/gitextractor/models"
+ gogit "github.com/go-git/go-git/v5"
+ "github.com/go-git/go-git/v5/plumbing"
+ "github.com/go-git/go-git/v5/plumbing/object"
+ "github.com/go-git/go-git/v5/plumbing/storer"
+ "regexp"
+)
+
+type GoGitRepo struct {
+ id string
+ logger log.Logger
+ store models.Store
+ repo *gogit.Repository
+ cleanUp func()
+}
+
+func (r *GoGitRepo) SetCleanUp(f func()) error {
+ if f != nil {
+ r.cleanUp = f
+ }
+ return nil
+}
+
+func (r *GoGitRepo) Close(ctx context.Context) error {
+ if err := r.store.Close(); err != nil {
+ return err
+ }
+ if r.cleanUp != nil {
+ r.cleanUp()
+ }
+ return nil
+}
+
+// CollectAll The main parser subtask
+func (r *GoGitRepo) CollectAll(subtaskCtx plugin.SubTaskContext) error {
+ subtaskCtx.SetProgress(0, -1)
+ err := r.CollectTags(subtaskCtx)
+ if err != nil {
+ return err
+ }
+ err = r.CollectBranches(subtaskCtx)
+ if err != nil {
+ return err
+ }
+ err = r.CollectCommits(subtaskCtx)
+ if err != nil {
+ return err
+ }
+ return r.CollectDiffLine(subtaskCtx)
+}
+
+// CountTags Count git tags subtask
+func (r *GoGitRepo) CountTags(ctx context.Context) (int, error) {
+ iter, err := r.repo.Tags()
+ if err != nil {
+ return 0, err
+ }
+ var tagsCount int
+ if err := iter.ForEach(func(reference *plumbing.Reference) error {
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ default:
+ }
+ tagsCount += 1
+ return nil
+ }); err != nil {
+ return 0, err
+ }
+ return tagsCount, nil
+}
+
+// CountBranches count the number of branches in a git repo
+func (r *GoGitRepo) CountBranches(ctx context.Context) (int, error) {
+ refIter, err := r.repo.Storer.IterReferences()
+ if err != nil {
+ return 0, err
+ }
+ branchIter := storer.NewReferenceFilteredIter(
+ func(r *plumbing.Reference) bool {
+ return r.Name().IsBranch() || r.Name().IsRemote()
+ }, refIter)
+ if err != nil {
+ return 0, err
+ }
+ var branchesCount int
+
+ headRef, err := r.repo.Head()
+ if err != nil {
+ return 0, err
+ }
+ if err := branchIter.ForEach(func(reference *plumbing.Reference) error {
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ default:
+ }
+ if reference.Name() != headRef.Name() {
+ branchesCount += 1
+ }
+ return nil
+ }); err != nil {
+ return 0, err
+ }
+ return branchesCount, nil
+}
+
+// CountCommits count the number of commits in a git repo
+func (r *GoGitRepo) CountCommits(ctx context.Context) (int, error) {
+ iter, err := r.repo.CommitObjects()
+ if err != nil {
+ return 0, err
+ }
+ var count int
+ if err := iter.ForEach(func(commit *object.Commit) error {
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ default:
+ }
+ count += 1
+ return nil
+ }); err != nil {
+ return 0, err
+ }
+ return count, nil
+}
+
+// CollectTags Collect Tags data
+func (r *GoGitRepo) CollectTags(subtaskCtx plugin.SubTaskContext) error {
+ tagIter, err := r.repo.Tags()
+ if err != nil {
+ return err
+ }
+ if err := tagIter.ForEach(func(ref *plumbing.Reference) error {
+ select {
+ case <-subtaskCtx.GetContext().Done():
+ return subtaskCtx.GetContext().Err()
+ default:
+ }
+ tagCommit := ref.Hash().String()
+ _, err := r.repo.CommitObject(ref.Hash())
+ if err != nil && errors.Is(err, plumbing.ErrObjectNotFound) {
+ h, err :=
r.repo.ResolveRevision(plumbing.Revision(ref.Name()))
+ if err != nil {
+ return err
+ }
+ tagCommit = h.String()
+ }
+ name := ref.Name().String()
+ if tagCommit != "" {
+ codeRef := &code.Ref{
+ DomainEntityExtended:
domainlayer.DomainEntityExtended{Id: fmt.Sprintf("%s:%s", r.id, name)},
+ RepoId: r.id,
+ Name: name,
+ CommitSha: tagCommit,
+ RefType: TAG,
+ }
+ err = r.store.Refs(codeRef)
+ if err != nil {
+ return err
+ }
+ subtaskCtx.IncProgress(1)
+ }
+ return nil
+ }); err != nil {
+ return err
+ }
+ return nil
+}
+
+// CollectBranches Collect branch data
+func (r *GoGitRepo) CollectBranches(subtaskCtx plugin.SubTaskContext) error {
+ refIter, err := r.repo.Storer.IterReferences()
+ if err != nil {
+ return err
+ }
+ branchIter := storer.NewReferenceFilteredIter(
+ func(r *plumbing.Reference) bool {
+ return r.Name().IsBranch() || r.Name().IsRemote()
+ }, refIter)
+ if err != nil {
+ return err
+ }
+ headRef, err := r.repo.Head()
+ if err != nil {
+ return err
+ }
+ if err := branchIter.ForEach(func(ref *plumbing.Reference) error {
+ select {
+ case <-subtaskCtx.GetContext().Done():
+ return subtaskCtx.GetContext().Err()
+ default:
+ }
+ name := ref.Name().Short()
+ sha := ref.Hash().String()
+ _, err := r.repo.CommitObject(ref.Hash())
+ if err != nil && errors.Is(err, plumbing.ErrObjectNotFound) {
+ // handle commit sha like
"0000000000000000000000000000000000000000"
+ h, err :=
r.repo.ResolveRevision(plumbing.Revision(ref.Name()))
+ if err != nil {
+ return err
+ }
+ sha = h.String()
+ }
+ codeRef := &code.Ref{
+ DomainEntityExtended:
domainlayer.DomainEntityExtended{Id: fmt.Sprintf("%s:%s", r.id, name)},
+ RepoId: r.id,
+ Name: name,
+ CommitSha: sha,
+ RefType: BRANCH,
+ IsDefault: ref.Name() == headRef.Name(),
+ }
+ if err := r.store.Refs(codeRef); err != nil {
+ return err
+ }
+ subtaskCtx.IncProgress(1)
+ return nil
+ }); err != nil {
+ return err
+ }
+ return nil
+}
+
+func (r *GoGitRepo) getComponentMap(subtaskCtx plugin.SubTaskContext)
(map[string]*regexp.Regexp, error) {
+ db := subtaskCtx.GetDal()
+ components := make([]code.Component, 0)
+ err := db.All(&components, dal.From(components), dal.Where("repo_id=
?", r.id))
+ if err != nil {
+ return nil, err
+ }
+ componentMap := make(map[string]*regexp.Regexp)
+ for _, component := range components {
+ componentMap[component.Name] =
regexp.MustCompile(component.PathRegex)
+ }
+ return componentMap, nil
+}
+
+// CollectCommits Collect data from each commit, we can also get the diff line
+func (r *GoGitRepo) CollectCommits(subtaskCtx plugin.SubTaskContext) (err
error) {
+ // check it first
+ componentMap, err := r.getComponentMap(subtaskCtx)
+ if err != nil {
+ return err
+ }
+
+ skipCommitFiles := subtaskCtx.GetConfigReader().GetBool(SkipCommitFiles)
+ repo := r.repo
+ store := r.store
+
+ commitsObjectsIter, err := repo.CommitObjects()
+ if err != nil {
+ return err
+ }
+
+ if err := commitsObjectsIter.ForEach(func(commit *object.Commit) error {
+ select {
+ case <-subtaskCtx.GetContext().Done():
+ return subtaskCtx.GetContext().Err()
+ default:
+ }
+ commitSha := commit.Hash.String()
+ codeCommit := &code.Commit{
+ Sha: commitSha,
+ Message: commit.Message,
+ AuthorName: commit.Author.Name,
+ AuthorEmail: commit.Author.Email,
+ AuthorId: commit.Author.Email,
+ AuthoredDate: commit.Author.When,
+ CommitterName: commit.Committer.Name,
+ CommitterEmail: commit.Committer.Email,
+ CommitterId: commit.Committer.Email,
+ CommittedDate: commit.Committer.When,
+ }
+ if err = r.storeParentCommits(commitSha, commit); err != nil {
+ return err
+ }
+
+ stats, err := commit.StatsContext(subtaskCtx.GetContext())
+ if err != nil {
+ return err
+ } else {
+ for _, stat := range stats {
+ codeCommit.Additions += stat.Addition
+ // In some repos, deletion may be zero, which
is different from git log --stat.
+ // It seems go-git doesn't get the correct
changes.
+ // I have run object.DiffTreeWithOptions
manually with different diff algorithms,
+ // but get the same result with StatsContext.
+ // I cannot reproduce it with another repo.
+ // A similar issue:
https://github.com/go-git/go-git/issues/367
+ codeCommit.Deletions += stat.Deletion
+ }
+ }
+
+ err = store.Commits(codeCommit)
+ if err != nil {
+ return err
+ }
+
+ codeRepoCommit := &code.RepoCommit{
+ RepoId: r.id,
+ CommitSha: commitSha,
+ }
+ err = store.RepoCommits(codeRepoCommit)
+ if err != nil {
+ return err
+ }
+ if !skipCommitFiles {
+ if err :=
r.storeDiffCommitFilesComparedToParent(subtaskCtx, componentMap, commit); err
!= nil {
+ return err
+ }
+ }
+ subtaskCtx.IncProgress(1)
+ return nil
+ }); err != nil {
+ return err
+ }
+ return
+}
+
+func (r *GoGitRepo) storeParentCommits(commitSha string, commit
*object.Commit) error {
+ if commit == nil {
+ return nil
+ }
+ var commitParents []*code.CommitParent
+ for i := 0; i < commit.NumParents(); i++ {
+ parent, err := commit.Parent(i)
+ if err != nil {
+ return err
+ }
+ if parent != nil {
+ if parentCommitSha := parent.Hash.String();
parentCommitSha != "" {
+ commitParents = append(commitParents,
&code.CommitParent{
+ CommitSha: commitSha,
+ ParentCommitSha: parentCommitSha,
+ })
+ }
+ }
+ }
+ return r.store.CommitParents(commitParents)
+}
+
+func (r *GoGitRepo) getCurrentAndParentTree(ctx context.Context, commit
*object.Commit) (*object.Tree, *object.Tree, error) {
+ if _, err := commit.Stats(); err != nil {
+ return nil, nil, err
+ }
+ commitTree, err := commit.Tree()
+ if err != nil {
+ return nil, nil, err
+ }
+ var firstParentTree *object.Tree
+ if commit.NumParents() > 0 {
+ firstParent, err := commit.Parents().Next()
+ if err != nil {
+ return nil, nil, err
+ }
+ firstParentTree, err = firstParent.Tree()
+ if err != nil {
+ return nil, nil, err
+ }
+ }
+ return commitTree, firstParentTree, nil
+}
+
+func (r *GoGitRepo) storeDiffCommitFilesComparedToParent(subtaskCtx
plugin.SubTaskContext, componentMap map[string]*regexp.Regexp, commit
*object.Commit) (err error) {
+ commitTree, firstParentTree, err :=
r.getCurrentAndParentTree(subtaskCtx.GetContext(), commit)
+ if err != nil {
+ return err
+ }
+ // no parent, doesn't need to patch
+ patch, err := firstParentTree.PatchContext(subtaskCtx.GetContext(),
commitTree)
+ if err != nil {
+ return err
+ }
+ for _, p := range patch.Stats() {
+ commitFile := &code.CommitFile{
+ CommitSha: commit.Hash.String(),
+ }
+ fileName := p.Name
+ commitFile.FilePath = fileName
+ commitFile.Id = genCommitFileId(commitFile.CommitSha, fileName)
+ commitFile.Deletions = p.Deletion
+ commitFile.Additions = p.Addition
+ if err := r.storeCommitFileComponents(subtaskCtx, componentMap,
commitFile.Id, commitFile.FilePath); err != nil {
+ return err
+ }
+ err = r.store.CommitFiles(commitFile)
+ if err != nil {
+ r.logger.Error(err, "CommitFiles error")
+ return nil
+ }
+ }
+ return nil
+}
+
+// With some long path,the varchar(255) was not enough both ID and file_path
+// So we use the hash to compress the path in ID and add length of file_path.
+// Use commitSha and the sha256 of FilePath to create id
+func genCommitFileId(commitSha, filePath string) string {
+ shaFilePath := sha256.New()
+ shaFilePath.Write([]byte(filePath))
+ return commitSha + ":" + hex.EncodeToString(shaFilePath.Sum(nil))
+}
+
+func (r *GoGitRepo) storeCommitFileComponents(subtaskCtx
plugin.SubTaskContext, componentMap map[string]*regexp.Regexp, commitFileId
string, commitFilePath string) error {
+ if commitFileId == "" || commitFilePath == "" {
+ return errors.Default.New("commit id r commit file path is
empty")
+ }
+ commitFileComponent := &code.CommitFileComponent{
+ CommitFileId: commitFileId,
+ ComponentName: "Default",
+ }
+ for component, reg := range componentMap {
+ if reg.MatchString(commitFilePath) {
+ commitFileComponent.ComponentName = component
+ break
+ }
+ }
+ return r.store.CommitFileComponents(commitFileComponent)
+}
+
+// storeRepoSnapshot depends on commit list's order.
+func (r *GoGitRepo) storeRepoSnapshot(subtaskCtx plugin.SubTaskContext,
commitList []*object.Commit) error {
+ ctx := subtaskCtx.GetContext()
+ snapshot := make(map[string][]string) // {"filePathAndName": ["line1
commit sha", "line2 commit sha"]}
+ for _, commit := range commitList {
+ commitTree, firstParentTree, err :=
r.getCurrentAndParentTree(ctx, commit)
+ if err != nil {
+ return err
+ }
+ patch, err :=
firstParentTree.PatchContext(subtaskCtx.GetContext(), commitTree)
+ if err != nil {
+ return err
+ }
+ for _, p := range patch.Stats() {
+ fileName := p.Name
+ if _, ok := snapshot[fileName]; !ok {
+ snapshot[fileName] = []string{}
+ }
+ blameResults, err := gogit.Blame(commit, fileName)
+ if err != nil {
+ return err
+ }
+ var newBlames []string
+ for _, blameResult := range blameResults.Lines {
+ newBlames = append(newBlames,
blameResult.Hash.String())
+ }
+ snapshot[fileName] = newBlames
+ }
+ }
+ // store snapshots
+ for fileName, lineBlames := range snapshot {
+ for idx, lineBlameHash := range lineBlames {
+ lineNo := idx + 1
+ repoSnapshot := &code.RepoSnapshot{
+ RepoId: r.id,
+ CommitSha: lineBlameHash,
+ FilePath: fileName,
+ LineNo: lineNo,
+ }
+ if err := r.store.RepoSnapshot(repoSnapshot); err !=
nil {
+ r.logger.Error(err, "store RepoSnapshot error")
+ return err
+ }
+ }
+ }
+ return nil
+}
+
+func (r *GoGitRepo) GetCommitList(subtaskCtx plugin.SubTaskContext)
([]*object.Commit, error) {
+ var commitList []*object.Commit
+ // get current head commit sha, default is master branch
+ // check branch, if not master, checkout to branch's head
+ commitOid, err := r.repo.Head()
+ if err != nil {
+ return nil, err
+ }
+ // get head commit object and add into commitList
+ commit, err := r.repo.CommitObject(commitOid.Hash())
+ if err != nil {
+ return nil, err
+ }
+ commitList = append(commitList, commit)
+ // if current head has parents, get parent commit sha
+ for commit != nil && commit.NumParents() > 0 {
+ parentCommit, err := commit.Parent(0)
+ if err != nil {
+ return nil, err
+ }
+ commit, err = r.repo.CommitObject(parentCommit.Hash)
+ if err != nil {
+ return nil, err
+ }
+ commitList = append(commitList, commit)
+ }
+ // reverse commitList
+ // use slices.Reverse(commitList) in higher golang version.
+ for i, j := 0, len(commitList)-1; i < j; i, j = i+1, j-1 {
+ commitList[i], commitList[j] = commitList[j], commitList[i]
+ }
+ return commitList, nil
+}
+
+func (r *GoGitRepo) CollectDiffLine(subtaskCtx plugin.SubTaskContext) error {
+ commitList, err := r.GetCommitList(subtaskCtx)
+ if err != nil {
+ return err
+ }
+ if err := r.storeRepoSnapshot(subtaskCtx, commitList); err != nil {
+ return err
+ }
+ // fixme: collecting CommitLineChange is not implemented.
+ // There is no way to get such information with go-git, and table
commit_line_change is not used by any dashboards
+ // So we just ignore it.
+ return nil
+}
diff --git a/backend/plugins/gitextractor/parser/repo.go
b/backend/plugins/gitextractor/parser/repo_libgit2.go
similarity index 96%
copy from backend/plugins/gitextractor/parser/repo.go
copy to backend/plugins/gitextractor/parser/repo_libgit2.go
index 1a3a90d22..43df1401e 100644
--- a/backend/plugins/gitextractor/parser/repo.go
+++ b/backend/plugins/gitextractor/parser/repo_libgit2.go
@@ -22,10 +22,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
- "regexp"
- "sort"
- "strconv"
-
"github.com/apache/incubator-devlake/core/config"
"github.com/apache/incubator-devlake/core/dal"
"github.com/apache/incubator-devlake/core/errors"
@@ -34,6 +30,9 @@ import (
"github.com/apache/incubator-devlake/core/models/domainlayer/code"
"github.com/apache/incubator-devlake/core/plugin"
"github.com/apache/incubator-devlake/plugins/gitextractor/models"
+ "regexp"
+ "sort"
+ "strconv"
git "github.com/libgit2/git2go/v33"
)
@@ -43,15 +42,23 @@ const SkipCommitFiles = "SKIP_COMMIT_FILES"
var TypeNotMatchError = "the requested type does not match the type in the ODB"
type GitRepo struct {
+ id string
+ logger log.Logger
+
store models.Store
- logger log.Logger
- id string
repo *git.Repository
cleanup func()
}
+func (r *GitRepo) SetCleanUp(f func()) error {
+ if f != nil {
+ r.cleanup = f
+ }
+ return nil
+}
+
// CollectAll The main parser subtask
-func (r *GitRepo) CollectAll(subtaskCtx plugin.SubTaskContext) errors.Error {
+func (r *GitRepo) CollectAll(subtaskCtx plugin.SubTaskContext) error {
subtaskCtx.SetProgress(0, -1)
err := r.CollectTags(subtaskCtx)
if err != nil {
@@ -69,7 +76,7 @@ func (r *GitRepo) CollectAll(subtaskCtx
plugin.SubTaskContext) errors.Error {
}
// Close resources
-func (r *GitRepo) Close() errors.Error {
+func (r *GitRepo) Close(ctx context.Context) error {
defer func() {
if r.cleanup != nil {
r.cleanup()
@@ -79,7 +86,7 @@ func (r *GitRepo) Close() errors.Error {
}
// CountTags Count git tags subtask
-func (r *GitRepo) CountTags() (int, errors.Error) {
+func (r *GitRepo) CountTags(ctx context.Context) (int, error) {
tags, err := r.repo.Tags.List()
if err != nil {
return 0, errors.Convert(err)
@@ -88,7 +95,7 @@ func (r *GitRepo) CountTags() (int, errors.Error) {
}
// CountBranches count the number of branches in a git repo
-func (r *GitRepo) CountBranches(ctx context.Context) (int, errors.Error) {
+func (r *GitRepo) CountBranches(ctx context.Context) (int, error) {
var branchIter *git.BranchIterator
branchIter, err := r.repo.NewBranchIterator(git.BranchAll)
if err != nil {
@@ -113,7 +120,7 @@ func (r *GitRepo) CountBranches(ctx context.Context) (int,
errors.Error) {
}
// CountCommits count the number of commits in a git repo
-func (r *GitRepo) CountCommits(ctx context.Context) (int, errors.Error) {
+func (r *GitRepo) CountCommits(ctx context.Context) (int, error) {
odb, err := r.repo.Odb()
if err != nil {
return 0, errors.Convert(err)
@@ -138,7 +145,7 @@ func (r *GitRepo) CountCommits(ctx context.Context) (int,
errors.Error) {
}
// CollectTags Collect Tags data
-func (r *GitRepo) CollectTags(subtaskCtx plugin.SubTaskContext) errors.Error {
+func (r *GitRepo) CollectTags(subtaskCtx plugin.SubTaskContext) error {
return errors.Convert(r.repo.Tags.Foreach(func(name string, id
*git.Oid) error {
select {
case <-subtaskCtx.GetContext().Done():
@@ -157,7 +164,7 @@ func (r *GitRepo) CollectTags(subtaskCtx
plugin.SubTaskContext) errors.Error {
} else {
tagCommit = id.String()
}
- r.logger.Info("tagCommit:%s", tagCommit)
+ r.logger.Debug("tagCommit: %s", tagCommit)
if tagCommit != "" {
ref := &code.Ref{
DomainEntityExtended:
domainlayer.DomainEntityExtended{Id: fmt.Sprintf("%s:%s", r.id, name)},
@@ -177,7 +184,7 @@ func (r *GitRepo) CollectTags(subtaskCtx
plugin.SubTaskContext) errors.Error {
}
// CollectBranches Collect branch data
-func (r *GitRepo) CollectBranches(subtaskCtx plugin.SubTaskContext)
errors.Error {
+func (r *GitRepo) CollectBranches(subtaskCtx plugin.SubTaskContext) error {
var repoInter *git.BranchIterator
repoInter, err := r.repo.NewBranchIterator(git.BranchAll)
if err != nil {
@@ -205,6 +212,7 @@ func (r *GitRepo) CollectBranches(subtaskCtx
plugin.SubTaskContext) errors.Error
CommitSha: sha,
RefType: BRANCH,
}
+ // commit sha may be an empty string, it's unexpected.
ref.IsDefault, err1 = branch.IsHead()
if err1 != nil && err1.Error() != TypeNotMatchError {
return err1
@@ -221,7 +229,7 @@ func (r *GitRepo) CollectBranches(subtaskCtx
plugin.SubTaskContext) errors.Error
}
// CollectCommits Collect data from each commit, we can also get the diff line
-func (r *GitRepo) CollectCommits(subtaskCtx plugin.SubTaskContext)
errors.Error {
+func (r *GitRepo) CollectCommits(subtaskCtx plugin.SubTaskContext) error {
opts, err := getDiffOpts()
if err != nil {
return err
@@ -285,8 +293,10 @@ func (r *GitRepo) CollectCommits(subtaskCtx
plugin.SubTaskContext) errors.Error
if stats, err = r.getDiffComparedToParent(c.Sha, commit,
parent, opts, componentMap); err != nil {
return err
}
+ r.logger.Debug("state: %#+v\n", stats.Deletions())
c.Additions += stats.Insertions()
c.Deletions += stats.Deletions()
+
err = r.store.Commits(c)
if err != nil {
return err
@@ -418,14 +428,14 @@ func (r *GitRepo) storeCommitFilesFromDiff(commitSha
string, diff *git.Diff, com
}
// CollectDiffLine get line diff data from a specific branch
-func (r *GitRepo) CollectDiffLine(subtaskCtx plugin.SubTaskContext)
errors.Error {
+func (r *GitRepo) CollectDiffLine(subtaskCtx plugin.SubTaskContext) error {
//Using this subtask,we can get every line change in every commit.
//We maintain a snapshot structure to get which commit each deleted
line belongs to
snapshot := make(map[string] /*file path*/ *models.FileBlame)
repo := r.repo
//step 1. get the reverse commit list
commitList := make([]git.Commit, 0)
- //get currently head commitsha, dafault is master branch
+ // get current head commit sha, default is master branch
// check branch, if not master, checkout to branch's head
commitOid, err1 := repo.Head()
if err1 != nil && err1.Error() != TypeNotMatchError {
@@ -485,7 +495,7 @@ func (r *GitRepo) CollectDiffLine(subtaskCtx
plugin.SubTaskContext) errors.Error
var lastFile string
lastFile = ""
err = diff.ForEach(func(file git.DiffDelta, progress
float64) (git.DiffForEachHunkCallback, error) {
- //if doesn't exist in snapshot, create a new one
+ // if it doesn't exist in snapshot, create a
new one
if _, ok := snapshot[file.OldFile.Path]; !ok {
fileBlame, err := models.NewFileBlame()
if err != nil {
@@ -498,7 +508,7 @@ func (r *GitRepo) CollectDiffLine(subtaskCtx
plugin.SubTaskContext) errors.Error
lastFile = file.NewFile.Path
} else if lastFile != file.NewFile.Path {
updateSnapshotFileBlame(curcommit,
deleted, added, lastFile, snapshot)
- //reset the deleted and added,last_file
now is current file
+ // reset the deleted and
added,last_file now is current file
deleted = make([]git.DiffLine, 0)
added = make([]git.DiffLine, 0)
lastFile = file.NewFile.Path
diff --git a/backend/plugins/gitextractor/parser/repo_test.go
b/backend/plugins/gitextractor/parser/repo_test.go
new file mode 100644
index 000000000..205bfc783
--- /dev/null
+++ b/backend/plugins/gitextractor/parser/repo_test.go
@@ -0,0 +1,302 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package parser
+
+import (
+ "context"
+ "fmt"
+ "github.com/apache/incubator-devlake/core/errors"
+ "github.com/apache/incubator-devlake/core/log"
+ "github.com/apache/incubator-devlake/impls/logruslog"
+ "github.com/apache/incubator-devlake/plugins/gitextractor/models"
+ "github.com/apache/incubator-devlake/plugins/gitextractor/store"
+ "github.com/stretchr/testify/assert"
+ "os"
+ "path/filepath"
+ "testing"
+)
+
+var (
+ enableRepoTest = false
+ repoId = "test-repo-id"
+ runInLocal = true
+ ctx = context.Background()
+ subTaskCtx = &testSubTaskContext{}
+ repoIncubatorDevlakeRemoteURL =
"https://github.com/apache/incubator-devlake"
+ output = "./output"
+
+ repoIncubatorApacheDevlakeWebsite = ""
+ simpleRepo = ""
+
+ logger log.Logger
+
+ storage models.Store
+ gitRepoCreator *GitRepoCreator
+
+ goGitStorage models.Store
+ goGitRepoCreator *GitRepoCreator
+)
+
+func TestMain(m *testing.M) {
+ if !enableRepoTest {
+ return
+ }
+ fmt.Println("test main starts")
+
+ if path, err := os.Getwd(); err != nil {
+ panic(err)
+ } else {
+ repoIncubatorApacheDevlakeWebsite = filepath.Join(path,
"test_repo", "website")
+ simpleRepo = filepath.Join(path, "test_repo", "demo")
+ fmt.Println("simple repo and website repo path have been
initialised.")
+ }
+
+ logger = logruslog.Global.Nested("git extractor")
+ fmt.Println("logger inited")
+
+ clearOutput()
+
+ var err error
+ storage, err = store.NewCsvStore(output + "_libgit2")
+ if err != nil {
+ panic(err)
+ }
+ defer storage.Close()
+ fmt.Println("git storage inited")
+ gitRepoCreator = NewGitRepoCreator(storage, logger)
+
+ goGitStorage, err = store.NewCsvStore(output + "_gogit")
+ if err != nil {
+ panic(err)
+ }
+ defer goGitStorage.Close()
+ fmt.Println("go git storage inited")
+ goGitRepoCreator = NewGitRepoCreator(goGitStorage, logger)
+
+ fmt.Printf("test main run success\n\tlogger: %+v\tstorage: %+v\tgogit
storage: %+v\n", logger, storage, goGitStorage)
+ m.Run()
+}
+
+func getRepos(localRepoDir string) (RepoCollector, RepoCollector) {
+ var gitRepo RepoCollector
+ var goGitRepo RepoCollector
+ var err errors.Error
+
+ if runInLocal {
+ repoPath := localRepoDir
+ gitRepo, err = gitRepoCreator.LocalRepo(repoPath, repoId)
+ if err != nil {
+ panic(err)
+ }
+ goGitRepo, err = goGitRepoCreator.LocalGoGitRepo(repoPath,
repoId)
+ if err != nil {
+ panic(err)
+ }
+ } else {
+ gitRepo, err = gitRepoCreator.CloneOverHTTP(subTaskCtx, repoId,
repoIncubatorDevlakeRemoteURL, "", "", "")
+ if err != nil {
+ panic(err)
+ }
+ goGitRepo, err =
goGitRepoCreator.CloneGoGitRepoOverHTTP(subTaskCtx, repoId,
repoIncubatorDevlakeRemoteURL, "", "", "")
+ if err != nil {
+ panic(err)
+ }
+ }
+ return goGitRepo, gitRepo
+}
+
+func TestGitRepo_CountRepoInfo(t *testing.T) {
+ if !enableRepoTest {
+ return
+ }
+ goGitRepo, gitRepo := getRepos(repoIncubatorApacheDevlakeWebsite)
+
+ {
+ tagsCount1, err1 := gitRepo.CountTags(ctx)
+ if err1 != nil {
+ panic(err1)
+ }
+ tagsCount2, err2 := goGitRepo.CountTags(ctx)
+ if err2 != nil {
+ panic(err2)
+ }
+ t.Logf("[tagsCount] libgit2 result: %d, gogit result: %d",
tagsCount1, tagsCount2)
+ assert.Equalf(t, tagsCount1, tagsCount2, "unexpected")
+ }
+
+ {
+ branchesCount1, err1 := gitRepo.CountBranches(ctx)
+ if err1 != nil {
+ panic(err1)
+ }
+ branchesCount2, err2 := goGitRepo.CountBranches(ctx)
+ if err2 != nil {
+ panic(err2)
+ }
+ t.Logf("[branchesCount] libgit2 result: %d, gogit result: %d",
branchesCount1, branchesCount2)
+ assert.Equalf(t, branchesCount1, branchesCount2, "unexpected")
+ }
+
+ {
+ commitCount1, err1 := gitRepo.CountCommits(ctx)
+ if err1 != nil {
+ panic(err1)
+ }
+ commitCount2, err2 := goGitRepo.CountCommits(ctx)
+ if err2 != nil {
+ panic(err2)
+ }
+ t.Logf("[commitCount] libgit2 result: %d, gogit result: %d",
commitCount1, commitCount2)
+ assert.Equalf(t, commitCount1, commitCount2, "unexpected")
+ }
+
+}
+
+// all testes pass
+func TestGitRepo_CollectRepoInfo(t *testing.T) {
+ if !enableRepoTest {
+ return
+ }
+ goGitRepo, gitRepo := getRepos(simpleRepo)
+
+ {
+ // finished
+ subTaskCtxCollectTags := &testSubTaskContext{}
+ if err1 := gitRepo.CollectTags(subTaskCtxCollectTags); err1 !=
nil {
+ panic(err1)
+ }
+ subTaskCtxCollectTagsWithGoGit := &testSubTaskContext{}
+ if err2 :=
goGitRepo.CollectTags(subTaskCtxCollectTagsWithGoGit); err2 != nil {
+ panic(err2)
+ }
+ t.Logf("[CollectTags] libgit2 result: %+v, gogit result: %+v",
subTaskCtxCollectTags, subTaskCtxCollectTagsWithGoGit)
+ assert.Equalf(t, subTaskCtxCollectTags.total,
subTaskCtxCollectTagsWithGoGit.total, "unexpected")
+ }
+
+ {
+ // finished
+ subTaskCtxCollectBranches := &testSubTaskContext{}
+ if err1 := gitRepo.CollectBranches(subTaskCtxCollectBranches);
err1 != nil {
+ panic(err1)
+ }
+ subTaskCtxCollectBranchesWithGoGit := &testSubTaskContext{}
+ if err2 :=
goGitRepo.CollectBranches(subTaskCtxCollectBranchesWithGoGit); err2 != nil {
+ panic(err2)
+ }
+ t.Logf("[CollectBranches] libgit2 result: %+v, gogit result:
%+v", subTaskCtxCollectBranches, subTaskCtxCollectBranchesWithGoGit)
+ assert.Equalf(t, subTaskCtxCollectBranches.total,
subTaskCtxCollectBranchesWithGoGit.total, "unexpected")
+ }
+
+ {
+ subTaskCtxCollectCommits := &testSubTaskContext{}
+ if err1 := gitRepo.CollectCommits(subTaskCtxCollectCommits);
err1 != nil {
+ panic(err1)
+ }
+ subTaskCtxCCollectCommitsWithGoGit := &testSubTaskContext{}
+ if err2 :=
goGitRepo.CollectCommits(subTaskCtxCCollectCommitsWithGoGit); err2 != nil {
+ panic(err2)
+ }
+
+ t.Logf("[CollectCommits] libgit2 result: %+v, gogit result:
%+v", subTaskCtxCollectCommits, subTaskCtxCCollectCommitsWithGoGit)
+ fmt.Println(subTaskCtxCollectCommits.total,
subTaskCtxCCollectCommitsWithGoGit.total)
+ assert.Equalf(t, subTaskCtxCollectCommits.total,
subTaskCtxCCollectCommitsWithGoGit.total, "unexpected")
+ }
+
+ {
+ subTaskCtxCollectDiffLine := &testSubTaskContext{}
+ if err1 := gitRepo.CollectDiffLine(subTaskCtxCollectDiffLine);
err1 != nil {
+ panic(err1)
+ }
+ subTaskCtxCollectDiffLineWithGoGit := &testSubTaskContext{}
+ if err2 :=
goGitRepo.CollectDiffLine(subTaskCtxCollectDiffLineWithGoGit); err2 != nil {
+ panic(err2)
+ }
+
+ t.Logf("[CollectDiffLine] libgit2 result: %+v, gogit result:
%+v", subTaskCtxCollectDiffLine, subTaskCtxCollectDiffLineWithGoGit)
+ fmt.Println(subTaskCtxCollectDiffLine.total,
subTaskCtxCollectDiffLineWithGoGit.total)
+ assert.Equalf(t, subTaskCtxCollectDiffLine.total,
subTaskCtxCollectDiffLineWithGoGit.total, "unexpected")
+ }
+}
+
+func clearOutput() {
+ os.RemoveAll(fmt.Sprintf("./output_libgit2"))
+ os.RemoveAll(fmt.Sprintf("./output_gogit"))
+}
+
+func TestGitRepo_CollectCommits(t *testing.T) {
+ if !enableRepoTest {
+ return
+ }
+ repoPath := simpleRepo
+ gitRepo, err := gitRepoCreator.LocalRepo(repoPath, repoId)
+ if err != nil {
+ panic(err)
+ }
+ goGitRepo, err := goGitRepoCreator.LocalGoGitRepo(repoPath, repoId)
+ if err != nil {
+ panic(err)
+ }
+
+ {
+ subTaskCtxCollectCommits := &testSubTaskContext{}
+ if err1 := gitRepo.CollectCommits(subTaskCtxCollectCommits);
err1 != nil {
+ panic(err1)
+ }
+
+ subTaskCtxCCollectCommitsWithGoGit := &testSubTaskContext{}
+ if err2 :=
goGitRepo.CollectCommits(subTaskCtxCCollectCommitsWithGoGit); err2 != nil {
+ panic(err2)
+ }
+
+ t.Logf("[CollectCommits] libgit2 result: %+v, gogit result:
%+v", subTaskCtxCollectCommits, subTaskCtxCCollectCommitsWithGoGit)
+ fmt.Println(subTaskCtxCollectCommits.total,
subTaskCtxCCollectCommitsWithGoGit.total)
+ assert.Equalf(t, subTaskCtxCollectCommits.total,
subTaskCtxCCollectCommitsWithGoGit.total, "unexpected")
+ }
+}
+
+func TestGitRepo_CollectDiffLine(t *testing.T) {
+ if !enableRepoTest {
+ return
+ }
+ repoPath := simpleRepo
+ gitRepo, err := gitRepoCreator.LocalRepo(repoPath, repoId)
+ if err != nil {
+ panic(err)
+ }
+ goGitRepo, err := goGitRepoCreator.LocalGoGitRepo(repoPath, repoId)
+ if err != nil {
+ panic(err)
+ }
+
+ {
+ subTaskCtxCollectDiffLine := &testSubTaskContext{}
+ if err1 := gitRepo.CollectDiffLine(subTaskCtxCollectDiffLine);
err1 != nil {
+ panic(err1)
+ }
+ //t.Logf("[CollectDiffLine] libgit2 result: %+v",
subTaskCtxCollectDiffLine)
+
+ subTaskCtxCollectDiffLineWithGoGit := &testSubTaskContext{}
+ if err2 :=
goGitRepo.CollectDiffLine(subTaskCtxCollectDiffLineWithGoGit); err2 != nil {
+ panic(err2)
+ }
+
+ t.Logf("[CollectCommits] libgit2 result: %+v, gogit result:
%+v", subTaskCtxCollectDiffLine, subTaskCtxCollectDiffLineWithGoGit)
+ fmt.Println(subTaskCtxCollectDiffLine.total,
subTaskCtxCollectDiffLineWithGoGit.total)
+ assert.Equalf(t, subTaskCtxCollectDiffLine.total,
subTaskCtxCollectDiffLineWithGoGit.total, "unexpected")
+ }
+}
diff --git a/backend/plugins/gitextractor/store/csv.go
b/backend/plugins/gitextractor/store/csv.go
index 7d26008f6..8701501c5 100644
--- a/backend/plugins/gitextractor/store/csv.go
+++ b/backend/plugins/gitextractor/store/csv.go
@@ -190,5 +190,11 @@ func (c *CsvStore) Close() errors.Error {
if c.snapshotWriter != nil {
c.snapshotWriter.Close()
}
+ if c.commitFileComponentWriter != nil {
+ c.commitFileComponentWriter.Close()
+ }
+ if c.commitLineChangeWriter != nil {
+ c.commitLineChangeWriter.Close()
+ }
return nil
}
diff --git a/backend/plugins/gitextractor/tasks/clone.go
b/backend/plugins/gitextractor/tasks/clone.go
index 87479c328..0b88e2ff6 100644
--- a/backend/plugins/gitextractor/tasks/clone.go
+++ b/backend/plugins/gitextractor/tasks/clone.go
@@ -28,6 +28,8 @@ import (
"strings"
)
+const useGoGitInGitExtractor = "USE_GO_GIT_IN_GIT_EXTRACTOR"
+
var CloneGitRepoMeta = plugin.SubTaskMeta{
Name: "cloneGitRepo",
EntryPoint: CloneGitRepo,
@@ -37,6 +39,16 @@ var CloneGitRepoMeta = plugin.SubTaskMeta{
DomainTypes: []string{plugin.DOMAIN_TYPE_CODE},
}
+func useGoGit(subTaskCtx plugin.SubTaskContext, taskData
*GitExtractorTaskData) bool {
+ if subTaskCtx.GetConfigReader().GetBool(useGoGitInGitExtractor) {
+ return true
+ }
+ if taskData != nil && taskData.Options.UseGoGit {
+ return true
+ }
+ return false
+}
+
func CloneGitRepo(subTaskCtx plugin.SubTaskContext) errors.Error {
taskData, ok := subTaskCtx.GetData().(*GitExtractorTaskData)
if !ok {
@@ -44,7 +56,16 @@ func CloneGitRepo(subTaskCtx plugin.SubTaskContext)
errors.Error {
}
op := taskData.Options
storage := store.NewDatabase(subTaskCtx, op.RepoId)
- repo, err := NewGitRepo(subTaskCtx, subTaskCtx.GetLogger(), storage, op)
+ var repo parser.RepoCollector
+ var err errors.Error
+ logger := subTaskCtx.GetLogger()
+ if useGoGit(subTaskCtx, taskData) {
+ logger.Info("use go-git in gitextractor")
+ repo, err = NewGoGitRepo(subTaskCtx, logger, storage, op)
+ } else {
+ logger.Info("use libgit2 in gitextractor")
+ repo, err = NewGitRepo(subTaskCtx, logger, storage, op)
+ }
if err != nil {
return err
}
@@ -54,9 +75,9 @@ func CloneGitRepo(subTaskCtx plugin.SubTaskContext)
errors.Error {
}
// NewGitRepo create and return a new parser git repo
-func NewGitRepo(ctx plugin.SubTaskContext, logger log.Logger, storage
models.Store, op *GitExtractorOptions) (*parser.GitRepo, errors.Error) {
+func NewGitRepo(ctx plugin.SubTaskContext, logger log.Logger, storage
models.Store, op *GitExtractorOptions) (parser.RepoCollector, errors.Error) {
var err errors.Error
- var repo *parser.GitRepo
+ var repo parser.RepoCollector
p := parser.NewGitRepoCreator(storage, logger)
if strings.HasPrefix(op.Url, "http") {
repo, err = p.CloneOverHTTP(ctx, op.RepoId, op.Url, op.User,
op.Password, op.Proxy)
@@ -69,3 +90,20 @@ func NewGitRepo(ctx plugin.SubTaskContext, logger
log.Logger, storage models.Sto
}
return repo, err
}
+
+// NewGoGitRepo create and return a new parser git repo with go-git
+func NewGoGitRepo(ctx plugin.SubTaskContext, logger log.Logger, storage
models.Store, op *GitExtractorOptions) (parser.RepoCollector, errors.Error) {
+ var err errors.Error
+ var repo parser.RepoCollector
+ p := parser.NewGitRepoCreator(storage, logger)
+ if strings.HasPrefix(op.Url, "http") {
+ repo, err = p.CloneGoGitRepoOverHTTP(ctx, op.RepoId, op.Url,
op.User, op.Password, op.Proxy)
+ } else if url := strings.TrimPrefix(op.Url, "ssh://");
strings.HasPrefix(url, "git@") {
+ repo, err = p.CloneGoGitRepoOverSSH(ctx, op.RepoId, url,
op.PrivateKey, op.Passphrase)
+ } else if strings.HasPrefix(op.Url, "/") {
+ repo, err = p.LocalGoGitRepo(op.Url, op.RepoId)
+ } else {
+ return nil, errors.BadInput.New(fmt.Sprintf("unsupported url
[%s]", op.Url))
+ }
+ return repo, err
+}
diff --git a/backend/plugins/gitextractor/tasks/git_repo_collector.go
b/backend/plugins/gitextractor/tasks/git_repo_collector.go
index d5132fa6c..5af4fe98a 100644
--- a/backend/plugins/gitextractor/tasks/git_repo_collector.go
+++ b/backend/plugins/gitextractor/tasks/git_repo_collector.go
@@ -27,7 +27,7 @@ import (
type GitExtractorTaskData struct {
Options *GitExtractorOptions
- GitRepo *parser.GitRepo
+ GitRepo parser.RepoCollector
}
type GitExtractorOptions struct {
@@ -39,6 +39,7 @@ type GitExtractorOptions struct {
PrivateKey string `json:"privateKey"`
Passphrase string `json:"passphrase"`
Proxy string `json:"proxy"`
+ UseGoGit bool `json:"use_go_git"`
}
func (o GitExtractorOptions) Valid() errors.Error {
@@ -60,11 +61,11 @@ func CollectGitCommits(subTaskCtx plugin.SubTaskContext)
errors.Error {
if count, err := repo.CountCommits(subTaskCtx.GetContext()); err != nil
{
subTaskCtx.GetLogger().Error(err, "unable to get commit count")
subTaskCtx.SetProgress(0, -1)
- return err
+ return errors.Convert(err)
} else {
subTaskCtx.SetProgress(0, count)
}
- return repo.CollectCommits(subTaskCtx)
+ return errors.Convert(repo.CollectCommits(subTaskCtx))
}
func CollectGitBranches(subTaskCtx plugin.SubTaskContext) errors.Error {
@@ -72,38 +73,38 @@ func CollectGitBranches(subTaskCtx plugin.SubTaskContext)
errors.Error {
if count, err := repo.CountBranches(subTaskCtx.GetContext()); err !=
nil {
subTaskCtx.GetLogger().Error(err, "unable to get branch count")
subTaskCtx.SetProgress(0, -1)
- return err
+ return errors.Convert(err)
} else {
subTaskCtx.SetProgress(0, count)
}
- return repo.CollectBranches(subTaskCtx)
+ return errors.Convert(repo.CollectBranches(subTaskCtx))
}
func CollectGitTags(subTaskCtx plugin.SubTaskContext) errors.Error {
repo := getGitRepo(subTaskCtx)
- if count, err := repo.CountTags(); err != nil {
+ if count, err := repo.CountTags(subTaskCtx.GetContext()); err != nil {
subTaskCtx.GetLogger().Error(err, "unable to get tag count")
subTaskCtx.SetProgress(0, -1)
- return err
+ return errors.Convert(err)
} else {
subTaskCtx.SetProgress(0, count)
}
- return repo.CollectTags(subTaskCtx)
+ return errors.Convert(repo.CollectTags(subTaskCtx))
}
func CollectGitDiffLines(subTaskCtx plugin.SubTaskContext) errors.Error {
repo := getGitRepo(subTaskCtx)
- if count, err := repo.CountTags(); err != nil {
+ if count, err := repo.CountTags(subTaskCtx.GetContext()); err != nil {
subTaskCtx.GetLogger().Error(err, "unable to get line content")
subTaskCtx.SetProgress(0, -1)
- return err
+ return errors.Convert(err)
} else {
subTaskCtx.SetProgress(0, count)
}
- return repo.CollectDiffLine(subTaskCtx)
+ return errors.Convert(repo.CollectDiffLine(subTaskCtx))
}
-func getGitRepo(subTaskCtx plugin.SubTaskContext) *parser.GitRepo {
+func getGitRepo(subTaskCtx plugin.SubTaskContext) parser.RepoCollector {
taskData, ok := subTaskCtx.GetData().(*GitExtractorTaskData)
if !ok {
panic("git repo reference not found on context")
diff --git a/env.example b/env.example
index e615f98d7..aab1ff22a 100755
--- a/env.example
+++ b/env.example
@@ -64,6 +64,10 @@ ENCRYPTION_SECRET=
IN_SECURE_SKIP_VERIFY=
##########################
+# In plugin gitextractor, use go-git to collector repo's data
+##########################
+USE_GO_GIT_IN_GIT_EXTRACTOR=false
+
# Set if response error when requesting /connections/{connection_id}/test
should be wrapped or not
##########################
WRAP_RESPONSE_ERROR=