pboling commented on code in PR #205:
URL: https://github.com/apache/skywalking-eyes/pull/205#discussion_r2338888137


##########
pkg/deps/ruby.go:
##########
@@ -0,0 +1,415 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package deps
+
+import (
+       "bufio"
+       "encoding/json"
+       "fmt"
+       "io"
+       "net/http"
+       "os"
+       "path/filepath"
+       "regexp"
+       "strconv"
+       "strings"
+       "time"
+)
+
+// GemfileLockResolver resolves Ruby dependencies from Gemfile.lock
+// It determines project type by the presence of a *.gemspec file in the same 
directory as Gemfile.lock.
+// - Library projects (with gemspec): ignore development dependencies; include 
only runtime deps and their transitive closure.
+// - App projects (no gemspec): include all dependencies in Gemfile.lock.
+// Licenses are fetched from RubyGems API unless overridden by user config.
+// See issue description for detailed rules.
+
+type GemfileLockResolver struct {
+       Resolver
+}
+
+func (r *GemfileLockResolver) CanResolve(file string) bool {
+       base := filepath.Base(file)
+       return base == "Gemfile.lock"
+}
+
+func (r *GemfileLockResolver) Resolve(lockfile string, config *ConfigDeps, 
report *Report) error {
+       dir := filepath.Dir(lockfile)
+
+       content, err := os.ReadFile(lockfile)
+       if err != nil {
+               return err
+       }
+
+       // Parse lockfile into specs graph and top-level dependencies
+       specs, deps, err := parseGemfileLock(string(content))
+       if err != nil {
+               return err
+       }
+
+       isLibrary := hasGemspec(dir)
+
+       var roots []string
+       if isLibrary {
+               // Extract runtime dependencies from gemspec(s)
+               runtimeRoots, err := runtimeDepsFromGemspecs(dir)
+               if err != nil {
+                       return err
+               }
+               if len(runtimeRoots) == 0 {
+                       // Fallback: if not found, use DEPENDENCIES from 
lockfile
+                       roots = deps
+               } else {
+                       roots = runtimeRoots
+               }
+       } else {
+               // App: all declared dependencies are relevant
+               roots = deps
+       }
+
+       // Compute the set of included gems
+       include := reachable(specs, roots)
+       // For app without explicit deps (rare), include all specs
+       if len(roots) == 0 {
+               for name := range specs {
+                       include[name] = struct{}{}
+               }
+       }
+
+       // Resolve licenses for included gems
+       for name := range include {
+               version := specs[name].Version
+               if exclude, _ := config.IsExcluded(name, version); exclude {
+                       continue
+               }
+               if l, ok := config.GetUserConfiguredLicense(name, version); ok {
+                       report.Resolve(&Result{Dependency: name, LicenseSpdxID: 
l, Version: version})
+                       continue
+               }
+
+               licenseID, err := fetchRubyGemsLicense(name, version)
+               if err != nil || licenseID == "" {
+                       report.Skip(&Result{Dependency: name, LicenseSpdxID: 
Unknown, Version: version})
+                       continue
+               }
+               report.Resolve(&Result{Dependency: name, LicenseSpdxID: 
licenseID, Version: version})
+       }
+
+       return nil
+}
+
+// -------- Parsing Gemfile.lock --------
+
+type gemSpec struct {
+       Name    string
+       Version string
+       Deps    []string
+}
+
+type gemGraph map[string]*gemSpec
+
+var (
+       lockSpecHeader = regexp.MustCompile(`^\s{4}([a-zA-Z0-9_\-]+) 
\(([^)]+)\)`) //     rake (13.0.6)
+       lockDepLine    = regexp.MustCompile(`^\s{6}([a-zA-Z0-9_\-]+)(?:\s|$)`)  
   //       activesupport (~> 6.1)
+)
+
+func parseGemfileLock(s string) (graph gemGraph, roots []string, err error) {
+       scanner := bufio.NewScanner(strings.NewReader(s))
+       scanner.Split(bufio.ScanLines)
+       graph = make(gemGraph)
+
+       inSpecs := false
+       inDeps := false
+       var current *gemSpec
+
+       for scanner.Scan() {
+               line := scanner.Text()
+               if strings.HasPrefix(line, "GEM") {
+                       inSpecs = true
+                       inDeps = false
+                       current = nil
+                       continue
+               }
+               if strings.HasPrefix(line, "DEPENDENCIES") {
+                       inSpecs = false
+                       inDeps = true
+                       current = nil
+                       continue
+               }
+               if strings.TrimSpace(line) == "specs:" && inSpecs {
+                       // just a marker
+                       continue
+               }
+
+               if inSpecs {
+                       if m := lockSpecHeader.FindStringSubmatch(line); len(m) 
== 3 {
+                               name := m[1]
+                               version := m[2]
+                               current = &gemSpec{Name: name, Version: version}
+                               graph[name] = current
+                               continue
+                       }
+                       if current != nil {
+                               if m := lockDepLine.FindStringSubmatch(line); 
len(m) == 2 {
+                                       depName := m[1]
+                                       current.Deps = append(current.Deps, 
depName)
+                               }
+                       }
+                       continue
+               }
+
+               if inDeps {
+                       trim := strings.TrimSpace(line)
+                       if trim == "" || strings.HasPrefix(trim, "BUNDLED 
WITH") {
+                               inDeps = false
+                               continue
+                       }
+                       // dependency line: byebug (~> 11.1)
+                       root := trim
+                       if i := strings.Index(root, " "); i >= 0 {
+                               root = root[:i]
+                       }
+                       // ignore comments and platforms
+                       if root != "" && !strings.HasPrefix(root, "#") {
+                               roots = append(roots, root)
+                       }
+                       continue
+               }
+       }
+       if err := scanner.Err(); err != nil {
+               return nil, nil, err
+       }
+       return graph, roots, nil
+}
+
+func hasGemspec(dir string) bool {
+       entries, err := os.ReadDir(dir)
+       if err != nil {
+               return false
+       }
+       for _, e := range entries {
+               if !e.IsDir() && strings.HasSuffix(e.Name(), ".gemspec") {
+                       return true
+               }
+       }
+       return false
+}
+
+var gemspecRuntimeRe = 
regexp.MustCompile(`(?m)\badd_(?:runtime_)?dependency\s*\(?\s*["']([^"']+)["']`)
+
+func runtimeDepsFromGemspecs(dir string) ([]string, error) {
+       entries, err := os.ReadDir(dir)
+       if err != nil {
+               return nil, err
+       }
+       runtime := make(map[string]struct{})
+       for _, e := range entries {
+               if e.IsDir() || !strings.HasSuffix(e.Name(), ".gemspec") {
+                       continue
+               }
+               b, err := os.ReadFile(filepath.Join(dir, e.Name()))
+               if err != nil {
+                       return nil, err
+               }
+               for _, m := range 
gemspecRuntimeRe.FindAllStringSubmatch(string(b), -1) {
+                       if len(m) == 2 {
+                               runtime[m[1]] = struct{}{}
+                       }
+               }
+       }
+       res := make([]string, 0, len(runtime))
+       for k := range runtime {
+               res = append(res, k)
+       }
+       return res, nil
+}
+
+func reachable(graph gemGraph, roots []string) map[string]struct{} {
+       vis := make(map[string]struct{})
+       var dfs func(string)
+       dfs = func(n string) {
+               if _, ok := vis[n]; ok {
+                       return
+               }
+               if _, ok := graph[n]; !ok {
+                       // unknown in specs, still include the root
+                       vis[n] = struct{}{}
+                       return
+               }
+               vis[n] = struct{}{}
+               for _, c := range graph[n].Deps {
+                       dfs(c)
+               }
+       }
+       for _, r := range roots {
+               dfs(r)
+       }
+       return vis
+}
+
+// -------- License resolution via RubyGems API --------
+
+type rubyGemsVersionInfo struct {
+       Licenses []string `json:"licenses"`
+       License  string   `json:"license"`
+}
+
+func fetchRubyGemsLicense(name, version string) (string, error) {
+       // Prefer version-specific API
+       url := 
fmt.Sprintf("https://rubygems.org/api/v2/rubygems/%s/versions/%s.json";, name, 
version)
+       licenseID, err := fetchRubyGemsLicenseFrom(url)
+       if err == nil && licenseID != "" {
+               return licenseID, nil
+       }
+       // Fallback to latest info
+       url = fmt.Sprintf("https://rubygems.org/api/v1/gems/%s.json";, name)

Review Comment:
   > Does gem package manager downloads the files to a local location like 
other package manager, if so we can read from the local file system.
   
   Yes, if the GHA is setup with Ruby, and runs the `bundle install` to install 
all the dependencies.  We could check them that way in most scenarios, but:
   
   - On some systems the gem package is split when it is installed, e.g., the 
gemspec may not exist at the project root, and with relevance to this use case, 
this would make the installed dependencies more difficult to parse through than 
the project's raw Gemfile.lock.
   
   - Downloading entire dependency packages, and doing the complete version 
solving routine, which requires downloading the entire gem index, may actually 
be significantly heavier load than API calls for each dependency.
   
   > Also, do we need to change the url to the one set in the Gemfile.lock? If 
users want to use they own registry?
   
   It is possible that they can use other registries... but
   
   - usually they are private
   - accessing them is far more complex
     - often involves an API token set in the ENV
     - which should never be in version control, and has no uniform naming 
scheme
   - these are edge cases
   - I would suggest that support for them be handled in further PRs by people 
who need that functionality (big corps can pay for things they need to use!)
   
   So, https://rubygems.org is the canonical default, and we could read it from 
the Gemfile, but it is a lot of complexity for a tiny fraction of users who are 
not paying me anything.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscr...@skywalking.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to