This is an automated email from the ASF dual-hosted git repository. warren pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git
commit 29abfc1525c4b7181bc2067f5a8c1daec41be027 Author: tgp <[email protected]> AuthorDate: Mon May 23 11:39:38 2022 +0800 collector extractor convertor draft --- plugins/gitlab/models/issue.go | 32 +++++ plugins/gitlab/models/issue_label.go | 18 +++ plugins/gitlab/tasks/issue_collector.go | 126 +++++++++++++++++ plugins/gitlab/tasks/issue_convertor.go | 89 ++++++++++++ plugins/gitlab/tasks/issue_extractor.go | 192 ++++++++++++++++++++++++++ plugins/gitlab/tasks/issue_label_convertor.go | 62 +++++++++ 6 files changed, 519 insertions(+) diff --git a/plugins/gitlab/models/issue.go b/plugins/gitlab/models/issue.go new file mode 100644 index 00000000..0ebfd1cd --- /dev/null +++ b/plugins/gitlab/models/issue.go @@ -0,0 +1,32 @@ +package models + +import ( + "github.com/merico-dev/lake/models/common" + "time" +) + +type GitlabIssue struct { + GitlabId int `gorm:"primaryKey"` + ProjectId int `gorm:"index"` + Number int `gorm:"index;comment:Used in API requests ex. api/repo/1/issue/<THIS_NUMBER>"` + State string `gorm:"type:varchar(255)"` + Title string + Body string + Priority string `gorm:"type:varchar(255)"` + Type string `gorm:"type:varchar(100)"` + Status string `gorm:"type:varchar(255)"` + AssigneeId int + AssigneeName string `gorm:"type:varchar(255)"` + LeadTimeMinutes uint + Url string `gorm:"type:varchar(255)"` + ClosedAt *time.Time + GitlabCreatedAt time.Time + GitlabUpdatedAt time.Time `gorm:"index"` + Severity string `gorm:"type:varchar(255)"` + Component string `gorm:"type:varchar(255)"` + common.NoPKModel +} +func (GitlabIssue) TableName() string { + return "_tool_gitlab_issues" +} + diff --git a/plugins/gitlab/models/issue_label.go b/plugins/gitlab/models/issue_label.go new file mode 100644 index 00000000..54e0c2be --- /dev/null +++ b/plugins/gitlab/models/issue_label.go @@ -0,0 +1,18 @@ +package models + +import ( +"github.com/merico-dev/lake/models/common" +) + +// Please note that Issue Labels can also apply to Pull Requests. +// Pull Requests are considered Issues in GitHub. + +type GitlabIssueLabel struct { + IssueId int `gorm:"primaryKey;autoIncrement:false"` + LabelName string `gorm:"primaryKey;type:varchar(255)"` + common.NoPKModel +} + +func (GitlabIssueLabel) TableName() string{ + return "_tool_gitlab_issue_labels" +} diff --git a/plugins/gitlab/tasks/issue_collector.go b/plugins/gitlab/tasks/issue_collector.go new file mode 100644 index 00000000..e2b1c7af --- /dev/null +++ b/plugins/gitlab/tasks/issue_collector.go @@ -0,0 +1,126 @@ +package tasks + +import ( + "encoding/json" + "fmt" + "net/http" + "net/url" + + "github.com/merico-dev/lake/plugins/helper" + + "github.com/merico-dev/lake/plugins/core" + "github.com/merico-dev/lake/plugins/gitlab/models" +) + +const RAW_ISSUE_TABLE = "gitlab_api_issues" + +//This Struct was declared in shared.go +//type GitlabApiParams struct { +// ProjectId int +//} + +var CollectApiIssuesMeta = core.SubTaskMeta{ + Name: "collectApiIssues", + EntryPoint: CollectApiIssues, + EnabledByDefault: true, + Description: "Collect issues data from Gitlab api", +} + +func CollectApiIssues(taskCtx core.SubTaskContext) error { + db := taskCtx.GetDb() + data := taskCtx.GetData().(*GitlabTaskData) + + since := data.Since + incremental := false + // user didn't specify a time range to sync, try load from database + if since == nil { + var latestUpdated models.GitlabIssue + err := db.Model(&latestUpdated). + Where("project_id = ?", data.Options.ProjectId). + Order("gitlab_updated_at DESC").Limit(1).Find(&latestUpdated).Error + + if err != nil { + return fmt.Errorf("failed to get latest gitlab issue record: %w", err) + } + if latestUpdated.GitlabId > 0 { + since = &latestUpdated.GitlabUpdatedAt + incremental = true + } + } + + collector, err := helper.NewApiCollector(helper.ApiCollectorArgs{ + RawDataSubTaskArgs: helper.RawDataSubTaskArgs{ + Ctx: taskCtx, + /* + This struct will be JSONEncoded and stored into database along with raw data itself, to identity minimal + set of data to be process, for example, we process JiraIssues by Board + */ + Params: GitlabApiParams{ + ProjectId: data.Options.ProjectId, + }, + /* + Table store raw data + */ + Table: RAW_ISSUE_TABLE, + }, + ApiClient: data.ApiClient, + PageSize: 100, + Incremental: incremental, + /* + url may use arbitrary variables from different source in any order, we need GoTemplate to allow more + flexible for all kinds of possibility. + Pager contains information for a particular page, calculated by ApiCollector, and will be passed into + GoTemplate to generate a url for that page. + We want to do page-fetching in ApiCollector, because the logic are highly similar, by doing so, we can + avoid duplicate logic for every tasks, and when we have a better idea like improving performance, we can + do it in one place + */ + UrlTemplate: "projects/{{ .Params.ProjectId }}/issues", + /* + (Optional) Return query string for request, or you can plug them into UrlTemplate directly + */ + Query: func(reqData *helper.RequestData) (url.Values, error) { + query := url.Values{} + if since != nil { + query.Set("created_after", since.String()) + } + query.Set("sort", "asc") + query.Set("page", fmt.Sprintf("%v", reqData.Pager.Page)) + query.Set("per_page", fmt.Sprintf("%v", reqData.Pager.Size)) + + return query, nil + }, + /* + Some api might do pagination by http headers + */ + //Header: func(pager *core.Pager) http.Header { + //}, + /* + Sometimes, we need to collect data based on previous collected data, like jira changelog, it requires + issue_id as part of the url. + We can mimic `stdin` design, to accept a `Input` function which produces a `Iterator`, collector + should iterate all records, and do data-fetching for each on, either in parallel or sequential order + UrlTemplate: "api/3/issue/{{ Input.ID }}/changelog" + */ + //Input: databaseIssuesIterator, + /* + For api endpoint that returns number of total pages, ApiCollector can collect pages in parallel with ease, + or other techniques are required if this information was missing. + */ + GetTotalPages: GetTotalPagesFromResponse, + ResponseParser: func(res *http.Response) ([]json.RawMessage, error) { + var items []json.RawMessage + err := helper.UnmarshalResponse(res, &items) + if err != nil { + return nil, err + } + return items, nil + }, + }) + + if err != nil { + return err + } + + return collector.Execute() +} diff --git a/plugins/gitlab/tasks/issue_convertor.go b/plugins/gitlab/tasks/issue_convertor.go new file mode 100644 index 00000000..c0f663a6 --- /dev/null +++ b/plugins/gitlab/tasks/issue_convertor.go @@ -0,0 +1,89 @@ +package tasks + +import ( + "reflect" + "strconv" + + "github.com/merico-dev/lake/plugins/core" + "github.com/merico-dev/lake/plugins/helper" + + "github.com/merico-dev/lake/models/domainlayer" + "github.com/merico-dev/lake/models/domainlayer/didgen" + "github.com/merico-dev/lake/models/domainlayer/ticket" + gitlabModels "github.com/merico-dev/lake/plugins/gitlab/models" +) + +var ConvertIssuesMeta = core.SubTaskMeta{ + Name: "convertIssues", + EntryPoint: ConvertIssues, + EnabledByDefault: true, + Description: "Convert tool layer table gitlab_issues into domain layer table issues", +} + +func ConvertIssues(taskCtx core.SubTaskContext) error { + db := taskCtx.GetDb() + data := taskCtx.GetData().(*GitlabTaskData) + projectId := data.Options.ProjectId + + issue := &gitlabModels.GitlabIssue{} + cursor, err := db.Model(issue).Where("project_id = ?", projectId).Rows() + + if err != nil { + return err + } + defer cursor.Close() + + issueIdGen := didgen.NewDomainIdGenerator(&gitlabModels.GitlabIssue{}) + userIdGen := didgen.NewDomainIdGenerator(&gitlabModels.GitlabUser{}) + boardIdGen := didgen.NewDomainIdGenerator(&gitlabModels.GitlabProject{}) + + converter, err := helper.NewDataConverter(helper.DataConverterArgs{ + RawDataSubTaskArgs: helper.RawDataSubTaskArgs{ + Ctx: taskCtx, + Params: GitlabApiParams{ + ProjectId: data.Options.ProjectId, + }, + Table: RAW_ISSUE_TABLE, + }, + InputRowType: reflect.TypeOf(gitlabModels.GitlabIssue{}), + Input: cursor, + Convert: func(inputRow interface{}) ([]interface{}, error) { + issue := inputRow.(*gitlabModels.GitlabIssue) + domainIssue := &ticket.Issue{ + DomainEntity: domainlayer.DomainEntity{Id: issueIdGen.Generate(issue.GitlabId)}, + Number: strconv.Itoa(issue.Number), + Title: issue.Title, + Description: issue.Body, + Priority: issue.Priority, + Type: issue.Type, + AssigneeId: userIdGen.Generate(issue.AssigneeId), + AssigneeName: issue.AssigneeName, + LeadTimeMinutes: issue.LeadTimeMinutes, + Url: issue.Url, + CreatedDate: &issue.GitlabCreatedAt, + UpdatedDate: &issue.GitlabUpdatedAt, + ResolutionDate: issue.ClosedAt, + Severity: issue.Severity, + Component: issue.Component, + } + if issue.State == "closed" { + domainIssue.Status = ticket.DONE + } else { + domainIssue.Status = ticket.TODO + } + boardIssue := &ticket.BoardIssue{ + BoardId: boardIdGen.Generate(projectId), + IssueId: domainIssue.Id, + } + return []interface{}{ + domainIssue, + boardIssue, + }, nil + }, + }) + if err != nil { + return err + } + + return converter.Execute() +} diff --git a/plugins/gitlab/tasks/issue_extractor.go b/plugins/gitlab/tasks/issue_extractor.go new file mode 100644 index 00000000..80fc920a --- /dev/null +++ b/plugins/gitlab/tasks/issue_extractor.go @@ -0,0 +1,192 @@ +package tasks + +import ( + "encoding/json" + "github.com/merico-dev/lake/plugins/core" + "github.com/merico-dev/lake/plugins/gitlab/models" + "github.com/merico-dev/lake/plugins/helper" +) + +var ExtractApiIssuesMeta = core.SubTaskMeta{ + Name: "extractApiIssues", + EntryPoint: ExtractApiIssues, + EnabledByDefault: true, + Description: "Extract raw Issues data into tool layer table gitlab_issues", +} + +type IssuesResponse struct { + ProjectId int `json:"id"` + Milestone struct { + Due_date string + Project_id int + State string + Description string + Iid int + Id int + Title string + CreatedAt *core.Iso8601Time + UpdatedAt *core.Iso8601Time + } + Author struct{ + State string + WebUrl string + AvatarUrl string + Username string + Id int + Name string + } + Description string + State string + Iid int + Assignees []struct { + AvatarUrl string + WebUrl string + State string + Username string + Id int + Name string + } + Assignee *struct { + AvatarUrl string + WebUrl string + State string + Username string + Id int + Name string + } + Type string + Labels []string `json:"labels"` + UpVotes int + DownVotes int + MergeRequestsCount int + Id int + Title string + GitlabUpdatedAt core.Iso8601Time `json:"updated_at"` + GitlabCreatedAt core.Iso8601Time `json:"created_at"` + GitlabClosedAt *core.Iso8601Time `json:"closed_at"` + ClosedBy struct{ + State string + WebUrl string + AvatarUrl string + Username string + Id int + Name string + } + UserNotesCount int + DueDate *core.Iso8601Time + WebUrl string `json:"web_url"` + References struct { + Short string + Relative string + Full string + } + TimeStats struct { + TimeEstimate int + TotalTimeSpent int + HumanTimeEstimate string + HumanTotalTimeSpent string + } + HasTasks bool + TaskStatus string + Confidential bool + DiscussionLocked bool + IssueType string + Serverity string + Links struct { + Self string `json:"url"` + Notes string + AwardEmoji string + Project string + } + TaskCompletionStatus struct { + Count int + CompletedCount int + } + +} + +func ExtractApiIssues(taskCtx core.SubTaskContext) error { + data := taskCtx.GetData().(*GitlabTaskData) + + extractor, err := helper.NewApiExtractor(helper.ApiExtractorArgs{ + RawDataSubTaskArgs: helper.RawDataSubTaskArgs{ + Ctx: taskCtx, + /* + This struct will be JSONEncoded and stored into database along with raw data itself, to identity minimal + set of data to be process, for example, we process JiraIssues by Board + */ + Params: GitlabApiParams{ + ProjectId: data.Options.ProjectId, + }, + /* + Table store raw data + */ + Table: RAW_ISSUE_TABLE, + }, + Extract: func(row *helper.RawData) ([]interface{}, error) { + body := &IssuesResponse{} + err := json.Unmarshal(row.Data, body) + if err != nil { + return nil, err + } + // need to extract 2 kinds of entities here + if body.ProjectId == 0 { + return nil, nil + } + ////If this is a pr, ignore + //if body.PullRequest.Url != "" { + // return nil, nil + //} + //If this is not Issue, ignore + if body.IssueType != "ISSUE" { + return nil, nil + } + results := make([]interface{}, 0, 2) + gitlabIssue, err := convertGitlabIssue(body, data.Options.ProjectId) + if err != nil { + return nil, err + } + + for _, label := range body.Labels { + results = append(results, &models.GitlabIssueLabel{ + IssueId: gitlabIssue.GitlabId, + LabelName: label, + }) + + } + results = append(results, gitlabIssue) + + return results, nil + }, + }) + + if err != nil { + return err + } + + return extractor.Execute() +} +func convertGitlabIssue(issue *IssuesResponse, projectId int) (*models.GitlabIssue, error) { + gitlabIssue := &models.GitlabIssue{ + GitlabId: issue.Id, + ProjectId: projectId, + Number: issue.Iid, + State: issue.State, + Title: issue.Title, + Body: issue.Description, + Url: issue.Links.Self, + ClosedAt: core.Iso8601TimeToTime(issue.GitlabClosedAt), + GitlabCreatedAt: issue.GitlabCreatedAt.ToTime(), + GitlabUpdatedAt: issue.GitlabUpdatedAt.ToTime(), + } + + if issue.Assignee != nil { + gitlabIssue.AssigneeId = issue.Assignee.Id + gitlabIssue.AssigneeName = issue.Assignee.Username + } + if issue.GitlabClosedAt != nil { + gitlabIssue.LeadTimeMinutes = uint(issue.GitlabClosedAt.ToTime().Sub(issue.GitlabCreatedAt.ToTime()).Minutes()) + } + + return gitlabIssue, nil +} diff --git a/plugins/gitlab/tasks/issue_label_convertor.go b/plugins/gitlab/tasks/issue_label_convertor.go new file mode 100644 index 00000000..2bbee876 --- /dev/null +++ b/plugins/gitlab/tasks/issue_label_convertor.go @@ -0,0 +1,62 @@ +package tasks + +import ( + "reflect" + + "github.com/merico-dev/lake/models/domainlayer/didgen" + "github.com/merico-dev/lake/models/domainlayer/ticket" + "github.com/merico-dev/lake/plugins/core" + gitlabModels "github.com/merico-dev/lake/plugins/gitlab/models" + "github.com/merico-dev/lake/plugins/helper" +) + +var ConvertIssueLabelsMeta = core.SubTaskMeta{ + Name: "convertIssueLabels", + EntryPoint: ConvertIssueLabels, + EnabledByDefault: true, + Description: "Convert tool layer table gitlab_issue_labels into domain layer table issue_labels", +} + +func ConvertIssueLabels(taskCtx core.SubTaskContext) error { + db := taskCtx.GetDb() + data := taskCtx.GetData().(*GitlabTaskData) + projectId := data.Options.ProjectId + + cursor, err := db.Model(&gitlabModels.GitlabIssueLabel{}). + Joins(`left join _tool_gitlab_issues on _tool_gitlab_issues.gitlab_id = _tool_gitlab_issue_labels.issue_id`). + Where("_tool_gitlab_issues.project_id = ?", projectId). + Order("issue_id ASC"). + Rows() + if err != nil { + return err + } + defer cursor.Close() + issueIdGen := didgen.NewDomainIdGenerator(&gitlabModels.GitlabIssue{}) + + converter, err := helper.NewDataConverter(helper.DataConverterArgs{ + RawDataSubTaskArgs: helper.RawDataSubTaskArgs{ + Ctx: taskCtx, + Params: GitlabApiParams{ + ProjectId: projectId, + }, + Table: RAW_ISSUE_TABLE, + }, + InputRowType: reflect.TypeOf(gitlabModels.GitlabIssueLabel{}), + Input: cursor, + Convert: func(inputRow interface{}) ([]interface{}, error) { + issueLabel := inputRow.(*gitlabModels.GitlabIssueLabel) + domainIssueLabel := &ticket.IssueLabel{ + IssueId: issueIdGen.Generate(issueLabel.IssueId), + LabelName: issueLabel.LabelName, + } + return []interface{}{ + domainIssueLabel, + }, nil + }, + }) + if err != nil { + return err + } + + return converter.Execute() +}
