pboling commented on code in PR #205: URL: https://github.com/apache/skywalking-eyes/pull/205#discussion_r2338888137
########## pkg/deps/ruby.go: ########## @@ -0,0 +1,415 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package deps + +import ( + "bufio" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "regexp" + "strconv" + "strings" + "time" +) + +// GemfileLockResolver resolves Ruby dependencies from Gemfile.lock +// It determines project type by the presence of a *.gemspec file in the same directory as Gemfile.lock. +// - Library projects (with gemspec): ignore development dependencies; include only runtime deps and their transitive closure. +// - App projects (no gemspec): include all dependencies in Gemfile.lock. +// Licenses are fetched from RubyGems API unless overridden by user config. +// See issue description for detailed rules. + +type GemfileLockResolver struct { + Resolver +} + +func (r *GemfileLockResolver) CanResolve(file string) bool { + base := filepath.Base(file) + return base == "Gemfile.lock" +} + +func (r *GemfileLockResolver) Resolve(lockfile string, config *ConfigDeps, report *Report) error { + dir := filepath.Dir(lockfile) + + content, err := os.ReadFile(lockfile) + if err != nil { + return err + } + + // Parse lockfile into specs graph and top-level dependencies + specs, deps, err := parseGemfileLock(string(content)) + if err != nil { + return err + } + + isLibrary := hasGemspec(dir) + + var roots []string + if isLibrary { + // Extract runtime dependencies from gemspec(s) + runtimeRoots, err := runtimeDepsFromGemspecs(dir) + if err != nil { + return err + } + if len(runtimeRoots) == 0 { + // Fallback: if not found, use DEPENDENCIES from lockfile + roots = deps + } else { + roots = runtimeRoots + } + } else { + // App: all declared dependencies are relevant + roots = deps + } + + // Compute the set of included gems + include := reachable(specs, roots) + // For app without explicit deps (rare), include all specs + if len(roots) == 0 { + for name := range specs { + include[name] = struct{}{} + } + } + + // Resolve licenses for included gems + for name := range include { + version := specs[name].Version + if exclude, _ := config.IsExcluded(name, version); exclude { + continue + } + if l, ok := config.GetUserConfiguredLicense(name, version); ok { + report.Resolve(&Result{Dependency: name, LicenseSpdxID: l, Version: version}) + continue + } + + licenseID, err := fetchRubyGemsLicense(name, version) + if err != nil || licenseID == "" { + report.Skip(&Result{Dependency: name, LicenseSpdxID: Unknown, Version: version}) + continue + } + report.Resolve(&Result{Dependency: name, LicenseSpdxID: licenseID, Version: version}) + } + + return nil +} + +// -------- Parsing Gemfile.lock -------- + +type gemSpec struct { + Name string + Version string + Deps []string +} + +type gemGraph map[string]*gemSpec + +var ( + lockSpecHeader = regexp.MustCompile(`^\s{4}([a-zA-Z0-9_\-]+) \(([^)]+)\)`) // rake (13.0.6) + lockDepLine = regexp.MustCompile(`^\s{6}([a-zA-Z0-9_\-]+)(?:\s|$)`) // activesupport (~> 6.1) +) + +func parseGemfileLock(s string) (graph gemGraph, roots []string, err error) { + scanner := bufio.NewScanner(strings.NewReader(s)) + scanner.Split(bufio.ScanLines) + graph = make(gemGraph) + + inSpecs := false + inDeps := false + var current *gemSpec + + for scanner.Scan() { + line := scanner.Text() + if strings.HasPrefix(line, "GEM") { + inSpecs = true + inDeps = false + current = nil + continue + } + if strings.HasPrefix(line, "DEPENDENCIES") { + inSpecs = false + inDeps = true + current = nil + continue + } + if strings.TrimSpace(line) == "specs:" && inSpecs { + // just a marker + continue + } + + if inSpecs { + if m := lockSpecHeader.FindStringSubmatch(line); len(m) == 3 { + name := m[1] + version := m[2] + current = &gemSpec{Name: name, Version: version} + graph[name] = current + continue + } + if current != nil { + if m := lockDepLine.FindStringSubmatch(line); len(m) == 2 { + depName := m[1] + current.Deps = append(current.Deps, depName) + } + } + continue + } + + if inDeps { + trim := strings.TrimSpace(line) + if trim == "" || strings.HasPrefix(trim, "BUNDLED WITH") { + inDeps = false + continue + } + // dependency line: byebug (~> 11.1) + root := trim + if i := strings.Index(root, " "); i >= 0 { + root = root[:i] + } + // ignore comments and platforms + if root != "" && !strings.HasPrefix(root, "#") { + roots = append(roots, root) + } + continue + } + } + if err := scanner.Err(); err != nil { + return nil, nil, err + } + return graph, roots, nil +} + +func hasGemspec(dir string) bool { + entries, err := os.ReadDir(dir) + if err != nil { + return false + } + for _, e := range entries { + if !e.IsDir() && strings.HasSuffix(e.Name(), ".gemspec") { + return true + } + } + return false +} + +var gemspecRuntimeRe = regexp.MustCompile(`(?m)\badd_(?:runtime_)?dependency\s*\(?\s*["']([^"']+)["']`) + +func runtimeDepsFromGemspecs(dir string) ([]string, error) { + entries, err := os.ReadDir(dir) + if err != nil { + return nil, err + } + runtime := make(map[string]struct{}) + for _, e := range entries { + if e.IsDir() || !strings.HasSuffix(e.Name(), ".gemspec") { + continue + } + b, err := os.ReadFile(filepath.Join(dir, e.Name())) + if err != nil { + return nil, err + } + for _, m := range gemspecRuntimeRe.FindAllStringSubmatch(string(b), -1) { + if len(m) == 2 { + runtime[m[1]] = struct{}{} + } + } + } + res := make([]string, 0, len(runtime)) + for k := range runtime { + res = append(res, k) + } + return res, nil +} + +func reachable(graph gemGraph, roots []string) map[string]struct{} { + vis := make(map[string]struct{}) + var dfs func(string) + dfs = func(n string) { + if _, ok := vis[n]; ok { + return + } + if _, ok := graph[n]; !ok { + // unknown in specs, still include the root + vis[n] = struct{}{} + return + } + vis[n] = struct{}{} + for _, c := range graph[n].Deps { + dfs(c) + } + } + for _, r := range roots { + dfs(r) + } + return vis +} + +// -------- License resolution via RubyGems API -------- + +type rubyGemsVersionInfo struct { + Licenses []string `json:"licenses"` + License string `json:"license"` +} + +func fetchRubyGemsLicense(name, version string) (string, error) { + // Prefer version-specific API + url := fmt.Sprintf("https://rubygems.org/api/v2/rubygems/%s/versions/%s.json", name, version) + licenseID, err := fetchRubyGemsLicenseFrom(url) + if err == nil && licenseID != "" { + return licenseID, nil + } + // Fallback to latest info + url = fmt.Sprintf("https://rubygems.org/api/v1/gems/%s.json", name) Review Comment: > Does gem package manager downloads the files to a local location like other package manager, if so we can read from the local file system. Yes, if the GHA workflow is setup with Ruby, and runs the `bundle install` to install all the dependencies. We could check them that way in most scenarios, but: - On some systems the gem package is split when it is installed, e.g., the gemspec may not exist at the project root, and with relevance to this use case, the split would make the installed dependencies more difficult to parse/walk through than the project's raw Gemfile.lock. - Downloading entire dependency packages, and doing the complete version solving routine, which requires downloading the entire gem index, may actually be significantly heavier load than API calls for each dependency. - This would also require that the project be in a good working state. Parsing agnostically like this doesn't require the project to be in any kind of serviceable state. > Also, do we need to change the url to the one set in the Gemfile.lock? If users want to use they own registry? It is possible that they can use other registries... but - if they do, they are usually private - accessing private ones is far more complex - often involves an API token set in the ENV - which should never be in version control, and has no uniform naming scheme - these are edge cases - I would suggest that support for them be handled in further PRs by people who need that functionality (big corps can pay for things they need to use!) So, https://rubygems.org is the canonical default, and we could read it from the Gemfile, but it is a lot of complexity for a tiny fraction of users who are not paying me anything. I'm just trying to provide better than 80/20 functionality for the "rest of us", the open source maintainers. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@skywalking.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org