This is an automated email from the ASF dual-hosted git repository.

klesh pushed a commit to branch kw-optimize-git-clone
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git

commit 79950463849c6870b5ac08a42393fa15c94c0f76
Author: Klesh Wong <[email protected]>
AuthorDate: Mon Apr 1 18:25:56 2024 +0800

    feat(gitextractor): skip commit stat to speed up pipeline
---
 backend/plugins/gitextractor/gitextractor.go       | 60 ++++++++++++++
 backend/plugins/gitextractor/impl/impl.go          | 27 +++++-
 backend/plugins/gitextractor/main.go               | 95 ----------------------
 backend/plugins/gitextractor/parser/repo_gogit.go  | 35 ++++----
 .../plugins/gitextractor/parser/repo_libgit2.go    | 28 +++----
 backend/plugins/gitextractor/parser/taskdata.go    | 57 +++++++++++++
 backend/plugins/gitextractor/tasks/clone.go        |  8 +-
 .../gitextractor/tasks/git_repo_collector.go       | 35 +-------
 8 files changed, 177 insertions(+), 168 deletions(-)

diff --git a/backend/plugins/gitextractor/gitextractor.go 
b/backend/plugins/gitextractor/gitextractor.go
new file mode 100644
index 000000000..5032381aa
--- /dev/null
+++ b/backend/plugins/gitextractor/gitextractor.go
@@ -0,0 +1,60 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package main // must be main for plugin entry point
+
+import (
+       "github.com/apache/incubator-devlake/core/runner"
+       "github.com/apache/incubator-devlake/plugins/gitextractor/impl"
+       "github.com/spf13/cobra"
+)
+
+var PluginEntry impl.GitExtractor //nolint
+
+// standalone mode for debugging
+func main() {
+       cmd := &cobra.Command{Use: "gitextractor"}
+       url := cmd.Flags().StringP("url", "l", "", "repo url")
+       repoId := cmd.Flags().StringP("repoId", "i", "", "domain layer repo id")
+       user := cmd.Flags().StringP("user", "u", "", "username")
+       password := cmd.Flags().StringP("password", "p", "", "password")
+       // pk := cmd.Flags().StringP("privateKey", "k", "", "private key file")
+       // pkPass := cmd.Flags().StringP("privateKeyPassPhrase", "P", "", 
"passphrase for private key")
+       proxy := cmd.Flags().StringP("proxy", "x", "", "proxy")
+       useGoGit := cmd.Flags().BoolP("useGoGit", "g", false, "use go-git 
instead of libgit2")
+       skipCommitStat := cmd.Flags().BoolP("skipCommitStat", "S", true, "")
+       skipCommitFiles := cmd.Flags().BoolP("skipCommitFiles", "F", true, "")
+       timeAfter := cmd.Flags().StringP("timeAfter", "a", "", "collect data 
that are created after specified time, ie 2006-01-02T15:04:05Z")
+       _ = cmd.MarkFlagRequired("url")
+       _ = cmd.MarkFlagRequired("repoId")
+
+       cmd.Run = func(c *cobra.Command, args []string) {
+               runner.DirectRun(c, args, PluginEntry, map[string]interface{}{
+                       "url":      *url,
+                       "repoId":   *repoId,
+                       "user":     *user,
+                       "password": *password,
+                       // "privateKey": *
+                       // "passphrase"
+                       "proxy":           *proxy,
+                       "useGoGit":        *useGoGit,
+                       "skipCommitStat":  skipCommitStat,
+                       "skipCommitFiles": skipCommitFiles,
+               }, *timeAfter)
+       }
+       runner.RunCmd(cmd)
+}
diff --git a/backend/plugins/gitextractor/impl/impl.go 
b/backend/plugins/gitextractor/impl/impl.go
index d8d5413d4..3f7f409c8 100644
--- a/backend/plugins/gitextractor/impl/impl.go
+++ b/backend/plugins/gitextractor/impl/impl.go
@@ -22,6 +22,7 @@ import (
        "github.com/apache/incubator-devlake/core/errors"
        "github.com/apache/incubator-devlake/core/plugin"
        helper "github.com/apache/incubator-devlake/helpers/pluginhelper/api"
+       "github.com/apache/incubator-devlake/plugins/gitextractor/parser"
        "github.com/apache/incubator-devlake/plugins/gitextractor/tasks"
 )
 
@@ -58,21 +59,41 @@ func (p GitExtractor) SubTaskMetas() []plugin.SubTaskMeta {
 
 // PrepareTaskData based on task context and user input options, return data 
that shared among all subtasks
 func (p GitExtractor) PrepareTaskData(taskCtx plugin.TaskContext, options 
map[string]interface{}) (interface{}, errors.Error) {
-       var op tasks.GitExtractorOptions
+       log := taskCtx.GetLogger().Nested("gitextractor.PrepareTaskData")
+       var op parser.GitExtractorOptions
        if err := helper.Decode(options, &op, nil); err != nil {
                return nil, err
        }
        if err := op.Valid(); err != nil {
                return nil, err
        }
-       taskData := &tasks.GitExtractorTaskData{
+
+       // commit stat, especially commit files(part of stat) are expensive to 
collect, so we skip them by default
+       cfg := taskCtx.GetConfigReader()
+       loadBool := func(optValue **bool, key string, defValue bool) {
+               // if user specified the option, use it
+               if *optValue != nil {
+                       return
+               }
+               // or fallback to .env configuration
+               if cfg.IsSet(key) {
+                       defValue = cfg.GetBool(key)
+               }
+               *optValue = &defValue
+       }
+       loadBool(&op.SkipCommitStat, "SKIP_COMMIT_STAT", true)
+       loadBool(&op.SkipCommitFiles, "SKIP_COMMIT_FILES", true)
+       log.Info("SkipCommitStat: %v", *op.SkipCommitStat)
+       log.Info("SkipCommitFiles: %v", *op.SkipCommitFiles)
+
+       taskData := &parser.GitExtractorTaskData{
                Options: &op,
        }
        return taskData, nil
 }
 
 func (p GitExtractor) Close(taskCtx plugin.TaskContext) errors.Error {
-       if taskData, ok := taskCtx.GetData().(*tasks.GitExtractorTaskData); ok {
+       if taskData, ok := taskCtx.GetData().(*parser.GitExtractorTaskData); ok 
{
                if taskData.GitRepo != nil {
                        if err := taskData.GitRepo.Close(taskCtx.GetContext()); 
err != nil {
                                return errors.Convert(err)
diff --git a/backend/plugins/gitextractor/main.go 
b/backend/plugins/gitextractor/main.go
deleted file mode 100644
index f9cc30490..000000000
--- a/backend/plugins/gitextractor/main.go
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
-Licensed to the Apache Software Foundation (ASF) under one or more
-contributor license agreements.  See the NOTICE file distributed with
-this work for additional information regarding copyright ownership.
-The ASF licenses this file to You under the Apache License, Version 2.0
-(the "License"); you may not use this file except in compliance with
-the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package main
-
-import (
-       "context"
-       "flag"
-       "github.com/apache/incubator-devlake/core/config"
-       "github.com/apache/incubator-devlake/core/runner"
-       contextimpl "github.com/apache/incubator-devlake/impls/context"
-       "github.com/apache/incubator-devlake/impls/dalgorm"
-       "github.com/apache/incubator-devlake/impls/logruslog"
-       "github.com/apache/incubator-devlake/plugins/gitextractor/impl"
-       "github.com/apache/incubator-devlake/plugins/gitextractor/models"
-       "github.com/apache/incubator-devlake/plugins/gitextractor/store"
-       "github.com/apache/incubator-devlake/plugins/gitextractor/tasks"
-)
-
-// PluginEntry is a variable exported for Framework to search and load
-var PluginEntry impl.GitExtractor //nolint
-
-func main() {
-       url := flag.String("url", "", "-url")
-       proxy := flag.String("proxy", "", "-proxy")
-       id := flag.String("id", "", "-id")
-       user := flag.String("user", "", "-user")
-       password := flag.String("password", "", "-password")
-       output := flag.String("output", "", "-output")
-       dbUrl := flag.String("db", "", "-db")
-       flag.Parse()
-       cfg := config.GetConfig()
-       logger := logruslog.Global.Nested("git extractor")
-       var storage models.Store
-       var err error
-       if *url == "" {
-               panic("url is missing")
-       }
-       if *id == "" {
-               panic("id is missing")
-       }
-       db, err := runner.NewGormDb(cfg, logger)
-       if err != nil {
-               panic(err)
-       }
-       basicRes := contextimpl.NewDefaultBasicRes(cfg, logger, 
dalgorm.NewDalgorm(db))
-       if *output != "" {
-               storage, err = store.NewCsvStore(*output)
-               if err != nil {
-                       panic(err)
-               }
-       } else if *dbUrl != "" {
-               cfg.Set("DB_URL", *dbUrl)
-       }
-       // If we didn't specify output or dburl, we will use db by default
-       if storage == nil {
-               storage = store.NewDatabase(basicRes, *id)
-       }
-       defer storage.Close()
-       ctx := context.Background()
-       subTaskCtx := contextimpl.NewStandaloneSubTaskContext(
-               ctx,
-               basicRes,
-               "git extractor",
-               nil,
-       )
-       repo, err := tasks.NewGitRepo(subTaskCtx, logger, storage, 
&tasks.GitExtractorOptions{
-               RepoId:   *id,
-               Url:      *url,
-               User:     *user,
-               Password: *password,
-               Proxy:    *proxy,
-       })
-       if err != nil {
-               panic(err)
-       }
-       defer repo.Close(ctx)
-       if err = repo.CollectAll(subTaskCtx); err != nil {
-               panic(err)
-       }
-}
diff --git a/backend/plugins/gitextractor/parser/repo_gogit.go 
b/backend/plugins/gitextractor/parser/repo_gogit.go
index 82be9b365..b12c87c43 100644
--- a/backend/plugins/gitextractor/parser/repo_gogit.go
+++ b/backend/plugins/gitextractor/parser/repo_gogit.go
@@ -22,6 +22,8 @@ import (
        "crypto/sha256"
        "encoding/hex"
        "fmt"
+       "regexp"
+
        "github.com/apache/incubator-devlake/core/dal"
        "github.com/apache/incubator-devlake/core/errors"
        "github.com/apache/incubator-devlake/core/log"
@@ -33,7 +35,6 @@ import (
        "github.com/go-git/go-git/v5/plumbing"
        "github.com/go-git/go-git/v5/plumbing/object"
        "github.com/go-git/go-git/v5/plumbing/storer"
-       "regexp"
 )
 
 type GoGitRepo struct {
@@ -268,13 +269,13 @@ func (r *GoGitRepo) getComponentMap(subtaskCtx 
plugin.SubTaskContext) (map[strin
 
 // CollectCommits Collect data from each commit, we can also get the diff line
 func (r *GoGitRepo) CollectCommits(subtaskCtx plugin.SubTaskContext) (err 
error) {
+       taskOpts := subtaskCtx.GetData().(*GitExtractorTaskData).Options
        // check it first
        componentMap, err := r.getComponentMap(subtaskCtx)
        if err != nil {
                return err
        }
 
-       skipCommitFiles := subtaskCtx.GetConfigReader().GetBool(SkipCommitFiles)
        repo := r.repo
        store := r.store
 
@@ -306,19 +307,21 @@ func (r *GoGitRepo) CollectCommits(subtaskCtx 
plugin.SubTaskContext) (err error)
                        return err
                }
 
-               stats, err := commit.StatsContext(subtaskCtx.GetContext())
-               if err != nil {
-                       return err
-               } else {
-                       for _, stat := range stats {
-                               codeCommit.Additions += stat.Addition
-                               // In some repos, deletion may be zero, which 
is different from git log --stat.
-                               // It seems go-git doesn't get the correct 
changes.
-                               // I have run object.DiffTreeWithOptions 
manually with different diff algorithms,
-                               // but get the same result with StatsContext.
-                               // I cannot reproduce it with another repo.
-                               // A similar issue: 
https://github.com/go-git/go-git/issues/367
-                               codeCommit.Deletions += stat.Deletion
+               if !*taskOpts.SkipCommitStat {
+                       stats, err := 
commit.StatsContext(subtaskCtx.GetContext())
+                       if err != nil {
+                               return err
+                       } else {
+                               for _, stat := range stats {
+                                       codeCommit.Additions += stat.Addition
+                                       // In some repos, deletion may be zero, 
which is different from git log --stat.
+                                       // It seems go-git doesn't get the 
correct changes.
+                                       // I have run 
object.DiffTreeWithOptions manually with different diff algorithms,
+                                       // but get the same result with 
StatsContext.
+                                       // I cannot reproduce it with another 
repo.
+                                       // A similar issue: 
https://github.com/go-git/go-git/issues/367
+                                       codeCommit.Deletions += stat.Deletion
+                               }
                        }
                }
 
@@ -335,7 +338,7 @@ func (r *GoGitRepo) CollectCommits(subtaskCtx 
plugin.SubTaskContext) (err error)
                if err != nil {
                        return err
                }
-               if !skipCommitFiles {
+               if !*taskOpts.SkipCommitFiles {
                        if err := 
r.storeDiffCommitFilesComparedToParent(subtaskCtx, componentMap, commit); err 
!= nil {
                                return err
                        }
diff --git a/backend/plugins/gitextractor/parser/repo_libgit2.go 
b/backend/plugins/gitextractor/parser/repo_libgit2.go
index c05c2bfcc..2bf2d6037 100644
--- a/backend/plugins/gitextractor/parser/repo_libgit2.go
+++ b/backend/plugins/gitextractor/parser/repo_libgit2.go
@@ -26,7 +26,6 @@ import (
        "sort"
        "strconv"
 
-       "github.com/apache/incubator-devlake/core/config"
        "github.com/apache/incubator-devlake/core/dal"
        "github.com/apache/incubator-devlake/core/errors"
        "github.com/apache/incubator-devlake/core/log"
@@ -38,8 +37,6 @@ import (
        git "github.com/libgit2/git2go/v33"
 )
 
-const SkipCommitFiles = "SKIP_COMMIT_FILES"
-
 var TypeNotMatchError = "the requested type does not match the type in the ODB"
 
 type GitRepo struct {
@@ -231,6 +228,7 @@ func (r *GitRepo) CollectBranches(subtaskCtx 
plugin.SubTaskContext) error {
 
 // CollectCommits Collect data from each commit, we can also get the diff line
 func (r *GitRepo) CollectCommits(subtaskCtx plugin.SubTaskContext) error {
+       taskOpts := subtaskCtx.GetData().(*GitExtractorTaskData).Options
        opts, err := getDiffOpts()
        if err != nil {
                return err
@@ -290,13 +288,16 @@ func (r *GitRepo) CollectCommits(subtaskCtx 
plugin.SubTaskContext) error {
                if commit.ParentCount() > 0 {
                        parent = commit.Parent(0)
                }
-               var stats *git.DiffStats
-               if stats, err = r.getDiffComparedToParent(c.Sha, commit, 
parent, opts, componentMap); err != nil {
-                       return err
+
+               if !*taskOpts.SkipCommitStat {
+                       var stats *git.DiffStats
+                       if stats, err = r.getDiffComparedToParent(taskOpts, 
c.Sha, commit, parent, opts, componentMap); err != nil {
+                               return err
+                       }
+                       r.logger.Debug("state: %#+v\n", stats.Deletions())
+                       c.Additions += stats.Insertions()
+                       c.Deletions += stats.Deletions()
                }
-               r.logger.Debug("state: %#+v\n", stats.Deletions())
-               c.Additions += stats.Insertions()
-               c.Deletions += stats.Deletions()
 
                err = r.store.Commits(c)
                if err != nil {
@@ -331,7 +332,7 @@ func (r *GitRepo) storeParentCommits(commitSha string, 
commit *git.Commit) error
        return r.store.CommitParents(commitParents)
 }
 
-func (r *GitRepo) getDiffComparedToParent(commitSha string, commit 
*git.Commit, parent *git.Commit, opts *git.DiffOptions, componentMap 
map[string]*regexp.Regexp) (*git.DiffStats, errors.Error) {
+func (r *GitRepo) getDiffComparedToParent(taskOpts *GitExtractorOptions, 
commitSha string, commit *git.Commit, parent *git.Commit, opts 
*git.DiffOptions, componentMap map[string]*regexp.Regexp) (*git.DiffStats, 
errors.Error) {
        var err error
        var parentTree, tree *git.Tree
        if parent != nil {
@@ -349,12 +350,7 @@ func (r *GitRepo) getDiffComparedToParent(commitSha 
string, commit *git.Commit,
        if err != nil {
                return nil, errors.Convert(err)
        }
-       cfg := config.GetConfig()
-       skipCommitFiles := true
-       if cfg.IsSet(SkipCommitFiles) {
-               skipCommitFiles = cfg.GetBool(SkipCommitFiles)
-       }
-       if !skipCommitFiles {
+       if !*taskOpts.SkipCommitFiles {
                err = r.storeCommitFilesFromDiff(commitSha, diff, componentMap)
                if err != nil {
                        return nil, errors.Convert(err)
diff --git a/backend/plugins/gitextractor/parser/taskdata.go 
b/backend/plugins/gitextractor/parser/taskdata.go
new file mode 100644
index 000000000..fcbdf20af
--- /dev/null
+++ b/backend/plugins/gitextractor/parser/taskdata.go
@@ -0,0 +1,57 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package parser
+
+import (
+       "strings"
+
+       "github.com/apache/incubator-devlake/core/errors"
+)
+
+type GitExtractorTaskData struct {
+       Options *GitExtractorOptions
+       GitRepo RepoCollector
+}
+
+type GitExtractorOptions struct {
+       RepoId          string `json:"repoId" mapstructure:"repoId"`
+       Name            string `json:"name" mapstructure:"name"`
+       Url             string `json:"url" mapstructure:"url"`
+       User            string `json:"user" mapstructure:"user"`
+       Password        string `json:"password" mapstructure:"password"`
+       PrivateKey      string `json:"privateKey" mapstructure:"privateKey"`
+       Passphrase      string `json:"passphrase" mapstructure:"passphrase"`
+       Proxy           string `json:"proxy" mapstructure:"proxy"`
+       UseGoGit        bool   `json:"useGoGit" mapstructure:"useGoGit"`
+       SkipCommitStat  *bool  `json:"skipCommitStat" 
mapstructure:"skipCommitStat" comment:"skip all commit stat including 
added/deleted lines and commit files as well"`
+       SkipCommitFiles *bool  `json:"skipCommitFiles" 
mapstructure:"skipCommitFiles"`
+}
+
+func (o GitExtractorOptions) Valid() errors.Error {
+       if o.RepoId == "" {
+               return errors.BadInput.New("empty repoId")
+       }
+       if o.Url == "" {
+               return errors.BadInput.New("empty url")
+       }
+       url := strings.TrimPrefix(o.Url, "ssh://")
+       if !(strings.HasPrefix(o.Url, "http") || strings.HasPrefix(url, "git@") 
|| strings.HasPrefix(o.Url, "/")) {
+               return errors.BadInput.New("wrong url")
+       }
+       return nil
+}
diff --git a/backend/plugins/gitextractor/tasks/clone.go 
b/backend/plugins/gitextractor/tasks/clone.go
index 599d4adb6..14feadbbe 100644
--- a/backend/plugins/gitextractor/tasks/clone.go
+++ b/backend/plugins/gitextractor/tasks/clone.go
@@ -41,7 +41,7 @@ var CloneGitRepoMeta = plugin.SubTaskMeta{
        ForceRunOnResume: true,
 }
 
-func useGoGit(subTaskCtx plugin.SubTaskContext, taskData 
*GitExtractorTaskData) bool {
+func useGoGit(subTaskCtx plugin.SubTaskContext, taskData 
*parser.GitExtractorTaskData) bool {
        if subTaskCtx.GetConfigReader().GetBool(useGoGitInGitExtractor) {
                return true
        }
@@ -52,7 +52,7 @@ func useGoGit(subTaskCtx plugin.SubTaskContext, taskData 
*GitExtractorTaskData)
 }
 
 func CloneGitRepo(subTaskCtx plugin.SubTaskContext) errors.Error {
-       taskData, ok := subTaskCtx.GetData().(*GitExtractorTaskData)
+       taskData, ok := subTaskCtx.GetData().(*parser.GitExtractorTaskData)
        if !ok {
                panic("git repo reference not found on context")
        }
@@ -77,7 +77,7 @@ func CloneGitRepo(subTaskCtx plugin.SubTaskContext) 
errors.Error {
 }
 
 // NewGitRepo create and return a new parser git repo
-func NewGitRepo(ctx plugin.SubTaskContext, logger log.Logger, storage 
models.Store, op *GitExtractorOptions) (parser.RepoCollector, errors.Error) {
+func NewGitRepo(ctx plugin.SubTaskContext, logger log.Logger, storage 
models.Store, op *parser.GitExtractorOptions) (parser.RepoCollector, 
errors.Error) {
        var err errors.Error
        var repo parser.RepoCollector
        p := parser.NewGitRepoCreator(storage, logger)
@@ -94,7 +94,7 @@ func NewGitRepo(ctx plugin.SubTaskContext, logger log.Logger, 
storage models.Sto
 }
 
 // NewGoGitRepo create and return a new parser git repo with go-git
-func NewGoGitRepo(ctx plugin.SubTaskContext, logger log.Logger, storage 
models.Store, op *GitExtractorOptions) (parser.RepoCollector, errors.Error) {
+func NewGoGitRepo(ctx plugin.SubTaskContext, logger log.Logger, storage 
models.Store, op *parser.GitExtractorOptions) (parser.RepoCollector, 
errors.Error) {
        var err errors.Error
        var repo parser.RepoCollector
        p := parser.NewGitRepoCreator(storage, logger)
diff --git a/backend/plugins/gitextractor/tasks/git_repo_collector.go 
b/backend/plugins/gitextractor/tasks/git_repo_collector.go
index 00abf28cf..c8a287b36 100644
--- a/backend/plugins/gitextractor/tasks/git_repo_collector.go
+++ b/backend/plugins/gitextractor/tasks/git_repo_collector.go
@@ -18,44 +18,11 @@ limitations under the License.
 package tasks
 
 import (
-       "strings"
-
        "github.com/apache/incubator-devlake/core/errors"
        "github.com/apache/incubator-devlake/core/plugin"
        "github.com/apache/incubator-devlake/plugins/gitextractor/parser"
 )
 
-type GitExtractorTaskData struct {
-       Options *GitExtractorOptions
-       GitRepo parser.RepoCollector
-}
-
-type GitExtractorOptions struct {
-       RepoId     string `json:"repoId"`
-       Name       string `json:"name"`
-       Url        string `json:"url"`
-       User       string `json:"user"`
-       Password   string `json:"password"`
-       PrivateKey string `json:"privateKey"`
-       Passphrase string `json:"passphrase"`
-       Proxy      string `json:"proxy"`
-       UseGoGit   bool   `json:"use_go_git"`
-}
-
-func (o GitExtractorOptions) Valid() errors.Error {
-       if o.RepoId == "" {
-               return errors.BadInput.New("empty repoId")
-       }
-       if o.Url == "" {
-               return errors.BadInput.New("empty url")
-       }
-       url := strings.TrimPrefix(o.Url, "ssh://")
-       if !(strings.HasPrefix(o.Url, "http") || strings.HasPrefix(url, "git@") 
|| strings.HasPrefix(o.Url, "/")) {
-               return errors.BadInput.New("wrong url")
-       }
-       return nil
-}
-
 func CollectGitCommits(subTaskCtx plugin.SubTaskContext) errors.Error {
        repo := getGitRepo(subTaskCtx)
        if count, err := repo.CountCommits(subTaskCtx.GetContext()); err != nil 
{
@@ -105,7 +72,7 @@ func CollectGitDiffLines(subTaskCtx plugin.SubTaskContext) 
errors.Error {
 }
 
 func getGitRepo(subTaskCtx plugin.SubTaskContext) parser.RepoCollector {
-       taskData, ok := subTaskCtx.GetData().(*GitExtractorTaskData)
+       taskData, ok := subTaskCtx.GetData().(*parser.GitExtractorTaskData)
        if !ok {
                panic("git repo reference not found on context")
        }

Reply via email to