This is an automated email from the ASF dual-hosted git repository. hoshea pushed a commit to branch threshold-config in repository https://gitbox.apache.org/repos/asf/skywalking-eyes.git
commit 220cad0cc8f919142133f2bff41722758576767d Author: Hoshea <[email protected]> AuthorDate: Sat May 21 21:42:28 2022 +0800 make threshold configurable --- README.md | 2 ++ pkg/deps/config.go | 14 ++++++++++++-- pkg/deps/golang.go | 14 +++++++------- pkg/deps/jar.go | 12 ++++++------ pkg/deps/jar_test.go | 6 +++++- pkg/deps/maven.go | 20 ++++++++++---------- pkg/deps/npm.go | 16 ++++++++-------- pkg/deps/resolve.go | 4 ++-- pkg/license/identifier.go | 12 +++--------- pkg/license/identifier_test.go | 6 ++++-- 10 files changed, 59 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 1a966a7..49dd4cb 100644 --- a/README.md +++ b/README.md @@ -768,6 +768,7 @@ dependency: # <15> - name: dependency-name # <18> version: dependency-version # <19> license: Apache-2.0 # <20> + threshold: 75 # <21> ``` 1. The `header` section is configurations for source codes license header. @@ -790,6 +791,7 @@ dependency: # <15> 18. The `name` of the dependency, The name is different for different projects, `PackagePath` in Go project, `GroupID:ArtifactID` in maven project, `PackageName` in NodeJS project. 19. The `version` of the dependency, it's locked, preventing license changed between different versions. 20. The [SPDX ID](https://spdx.org/licenses/) of the dependency license. +21. The minimum percentage of the file that must contain license text for identifying a license, default is `75`. **NOTE**: When the `SPDX-ID` is Apache-2.0 and the owner is Apache Software foundation, the content would be [a dedicated license](https://www.apache.org/legal/src-headers.html#headers) specified by the ASF, otherwise, the license would be [the standard one](https://www.apache.org/foundation/license-faq.html#Apply-My-Software). diff --git a/pkg/deps/config.go b/pkg/deps/config.go index 64c0a53..d3a77ed 100644 --- a/pkg/deps/config.go +++ b/pkg/deps/config.go @@ -22,9 +22,15 @@ import ( "path/filepath" ) +// DefaultCoverageThreshold is the minimum percentage of the file +// that must contain license text for identifying a license. +// Reference: https://github.com/golang/pkgsite/blob/d43359e3a135fc391960db4f5800eb081d658412/internal/licenses/licenses.go#L48 +const DefaultCoverageThreshold = 75 + type ConfigDeps struct { - Files []string `yaml:"files"` - License []*ConfigDepLicense `yaml:"licenses"` + Threshold int `yaml:"threshold"` + Files []string `yaml:"files"` + Licenses []*ConfigDepLicense `yaml:"licenses"` } type ConfigDepLicense struct { @@ -46,5 +52,9 @@ func (config *ConfigDeps) Finalize(configFile string) error { config.Files[i] = filepath.Join(filepath.Dir(configFileAbsPath), file) } + if config.Threshold <= 0 { + config.Threshold = DefaultCoverageThreshold + } + return nil } diff --git a/pkg/deps/golang.go b/pkg/deps/golang.go index 0e93b82..7483005 100644 --- a/pkg/deps/golang.go +++ b/pkg/deps/golang.go @@ -45,7 +45,7 @@ func (resolver *GoModResolver) CanResolve(file string) bool { } // Resolve resolves licenses of all dependencies declared in the go.mod file. -func (resolver *GoModResolver) Resolve(goModFile string, licenses []*ConfigDepLicense, report *Report) error { +func (resolver *GoModResolver) Resolve(goModFile string, config *ConfigDeps, report *Report) error { if err := os.Chdir(filepath.Dir(goModFile)); err != nil { return err } @@ -78,14 +78,14 @@ func (resolver *GoModResolver) Resolve(goModFile string, licenses []*ConfigDepLi logger.Log.Debugln("Module size:", len(modules)) - return resolver.ResolvePackages(modules, licenses, report) + return resolver.ResolvePackages(modules, config, report) } // ResolvePackages resolves the licenses of the given packages. -func (resolver *GoModResolver) ResolvePackages(modules []*packages.Module, licenses []*ConfigDepLicense, report *Report) error { +func (resolver *GoModResolver) ResolvePackages(modules []*packages.Module, config *ConfigDeps, report *Report) error { for _, module := range modules { func() { - for _, l := range licenses { + for _, l := range config.Licenses { if l.Name == module.Path && l.Version == module.Version { report.Resolve(&Result{ Dependency: module.Path, @@ -95,7 +95,7 @@ func (resolver *GoModResolver) ResolvePackages(modules []*packages.Module, licen return } } - err := resolver.ResolvePackageLicense(module, report) + err := resolver.ResolvePackageLicense(config, module, report) if err != nil { logger.Log.Warnf("Failed to resolve the license of <%s@%s>: %v\n", module.Path, module.Version, err) report.Skip(&Result{ @@ -111,7 +111,7 @@ func (resolver *GoModResolver) ResolvePackages(modules []*packages.Module, licen var possibleLicenseFileName = regexp.MustCompile(`(?i)^LICENSE|LICENCE(\.txt)?|COPYING(\.txt)?$`) -func (resolver *GoModResolver) ResolvePackageLicense(module *packages.Module, report *Report) error { +func (resolver *GoModResolver) ResolvePackageLicense(config *ConfigDeps, module *packages.Module, report *Report) error { dir := module.Dir for { @@ -129,7 +129,7 @@ func (resolver *GoModResolver) ResolvePackageLicense(module *packages.Module, re if err != nil { return err } - identifier, err := license.Identify(string(content)) + identifier, err := license.Identify(string(content), config.Threshold) if err != nil { return err } diff --git a/pkg/deps/jar.go b/pkg/deps/jar.go index 31132dd..3c56188 100644 --- a/pkg/deps/jar.go +++ b/pkg/deps/jar.go @@ -37,9 +37,9 @@ func (resolver *JarResolver) CanResolve(jarFile string) bool { return filepath.Ext(jarFile) == ".jar" } -func (resolver *JarResolver) Resolve(jarFile string, report *Report) error { +func (resolver *JarResolver) Resolve(jarFile string, config *ConfigDeps, report *Report) error { state := NotFound - if err := resolver.ResolveJar(&state, jarFile, Unknown, report); err != nil { + if err := resolver.ResolveJar(config, &state, jarFile, Unknown, report); err != nil { dep := filepath.Base(jarFile) logger.Log.Warnf("Failed to resolve the license of <%s>: %v\n", dep, state.String()) report.Skip(&Result{ @@ -51,7 +51,7 @@ func (resolver *JarResolver) Resolve(jarFile string, report *Report) error { return nil } -func (resolver *JarResolver) ResolveJar(state *State, jarFile, version string, report *Report) error { +func (resolver *JarResolver) ResolveJar(config *ConfigDeps, state *State, jarFile, version string, report *Report) error { dep := filepath.Base(jarFile) compressedJar, err := zip.OpenReader(jarFile) @@ -76,7 +76,7 @@ func (resolver *JarResolver) ResolveJar(state *State, jarFile, version string, r return err } - return resolver.IdentifyLicense(jarFile, dep, buf.String(), version, report) + return resolver.IdentifyLicense(config, jarFile, dep, buf.String(), version, report) } } @@ -122,8 +122,8 @@ func (resolver *JarResolver) ReadFileFromZip(archiveFile *zip.File) (*bytes.Buff return buf, nil } -func (resolver *JarResolver) IdentifyLicense(path, dep, content, version string, report *Report) error { - identifier, err := license.Identify(content) +func (resolver *JarResolver) IdentifyLicense(config *ConfigDeps, path, dep, content, version string, report *Report) error { + identifier, err := license.Identify(content, config.Threshold) if err != nil { return err } diff --git a/pkg/deps/jar_test.go b/pkg/deps/jar_test.go index 8b18b43..3884a8e 100644 --- a/pkg/deps/jar_test.go +++ b/pkg/deps/jar_test.go @@ -79,6 +79,10 @@ func copyJars(t *testing.T, pomFile, content string) ([]string, error) { } func TestResolveJar(t *testing.T) { + config := &deps.ConfigDeps{ + Threshold: deps.DefaultCoverageThreshold, + } + if _, err := exec.Command("mvn", "--version").Output(); err != nil { logger.Log.Warnf("Failed to find mvn, the test `TestResolveJar` was skipped") return @@ -132,7 +136,7 @@ func TestResolveJar(t *testing.T) { report := deps.Report{} for _, jar := range jars { if resolver.CanResolve(jar) { - if err := resolver.Resolve(jar, &report); err != nil { + if err := resolver.Resolve(jar, config, &report); err != nil { t.Error(err) return } diff --git a/pkg/deps/maven.go b/pkg/deps/maven.go index 3d74d41..23c6324 100644 --- a/pkg/deps/maven.go +++ b/pkg/deps/maven.go @@ -48,7 +48,7 @@ func (resolver *MavenPomResolver) CanResolve(mavenPomFile string) bool { } // Resolve resolves licenses of all dependencies declared in the pom.xml file. -func (resolver *MavenPomResolver) Resolve(mavenPomFile string, licenses []*ConfigDepLicense, report *Report) error { +func (resolver *MavenPomResolver) Resolve(mavenPomFile string, config *ConfigDeps, report *Report) error { if err := os.Chdir(filepath.Dir(mavenPomFile)); err != nil { return err } @@ -70,7 +70,7 @@ func (resolver *MavenPomResolver) Resolve(mavenPomFile string, licenses []*Confi } } - return resolver.ResolveDependencies(deps, licenses, report) + return resolver.ResolveDependencies(deps, config, report) } // CheckMVN check available maven tools, find local repositories and download all dependencies @@ -142,10 +142,10 @@ func (resolver *MavenPomResolver) LoadDependencies() ([]*Dependency, error) { } // ResolveDependencies resolves the licenses of the given dependencies -func (resolver *MavenPomResolver) ResolveDependencies(deps []*Dependency, licenses []*ConfigDepLicense, report *Report) error { +func (resolver *MavenPomResolver) ResolveDependencies(deps []*Dependency, config *ConfigDeps, report *Report) error { for _, dep := range deps { func() { - for _, l := range licenses { + for _, l := range config.Licenses { if l.Name == fmt.Sprintf("%s:%s", dep.GroupID, dep.ArtifactID) && l.Version == dep.Version { report.Resolve(&Result{ Dependency: dep.Jar(), @@ -156,7 +156,7 @@ func (resolver *MavenPomResolver) ResolveDependencies(deps []*Dependency, licens } } state := NotFound - err := resolver.ResolveLicense(&state, dep, report) + err := resolver.ResolveLicense(config, &state, dep, report) if err != nil { logger.Log.Warnf("Failed to resolve the license of <%s>: %v\n", dep.Jar(), state.String()) report.Skip(&Result{ @@ -171,17 +171,17 @@ func (resolver *MavenPomResolver) ResolveDependencies(deps []*Dependency, licens } // ResolveLicense search all possible locations of the license, such as pom file, jar package -func (resolver *MavenPomResolver) ResolveLicense(state *State, dep *Dependency, report *Report) error { - err := resolver.ResolveJar(state, filepath.Join(resolver.repo, dep.Path(), dep.Jar()), dep.Version, report) +func (resolver *MavenPomResolver) ResolveLicense(config *ConfigDeps, state *State, dep *Dependency, report *Report) error { + err := resolver.ResolveJar(config, state, filepath.Join(resolver.repo, dep.Path(), dep.Jar()), dep.Version, report) if err == nil { return nil } - return resolver.ResolveLicenseFromPom(state, dep, report) + return resolver.ResolveLicenseFromPom(config, state, dep, report) } // ResolveLicenseFromPom search for license in the pom file, which may appear in the header comments or in license element of xml -func (resolver *MavenPomResolver) ResolveLicenseFromPom(state *State, dep *Dependency, report *Report) (err error) { +func (resolver *MavenPomResolver) ResolveLicenseFromPom(config *ConfigDeps, state *State, dep *Dependency, report *Report) (err error) { pomFile := filepath.Join(resolver.repo, dep.Path(), dep.Pom()) pom, err := resolver.ReadLicensesFromPom(pomFile) @@ -204,7 +204,7 @@ func (resolver *MavenPomResolver) ResolveLicenseFromPom(state *State, dep *Depen return err } else if headerComments != "" { *state |= FoundLicenseInPomHeader - return resolver.IdentifyLicense(pomFile, dep.Jar(), headerComments, dep.Version, report) + return resolver.IdentifyLicense(config, pomFile, dep.Jar(), headerComments, dep.Version, report) } return fmt.Errorf("not found in pom file") diff --git a/pkg/deps/npm.go b/pkg/deps/npm.go index 18ba59b..3fc1a0c 100644 --- a/pkg/deps/npm.go +++ b/pkg/deps/npm.go @@ -63,7 +63,7 @@ func (resolver *NpmResolver) CanResolve(file string) bool { } // Resolve resolves licenses of all dependencies declared in the package.json file. -func (resolver *NpmResolver) Resolve(pkgFile string, licenses []*ConfigDepLicense, report *Report) error { +func (resolver *NpmResolver) Resolve(pkgFile string, config *ConfigDeps, report *Report) error { workDir := filepath.Dir(pkgFile) if err := os.Chdir(workDir); err != nil { return err @@ -85,7 +85,7 @@ func (resolver *NpmResolver) Resolve(pkgFile string, licenses []*ConfigDepLicens // Walk through each package's root directory to resolve licenses // Resolve from a package's package.json file or its license file for _, pkg := range pkgs { - if result := resolver.ResolvePackageLicense(pkg.Name, pkg.Path, licenses); result.LicenseSpdxID != "" { + if result := resolver.ResolvePackageLicense(pkg.Name, pkg.Path, config); result.LicenseSpdxID != "" { report.Resolve(result) } else { result.LicenseSpdxID = Unknown @@ -185,17 +185,17 @@ func (resolver *NpmResolver) GetInstalledPkgs(pkgDir string) []*Package { // First, try to find and parse the package's package.json file to check the license file // If the previous step fails, then try to identify the package's LICENSE file // It's a necessary procedure to check the LICENSE file, because the resolver needs to record the license content -func (resolver *NpmResolver) ResolvePackageLicense(pkgName, pkgPath string, licenses []*ConfigDepLicense) *Result { +func (resolver *NpmResolver) ResolvePackageLicense(pkgName, pkgPath string, config *ConfigDeps) *Result { result := &Result{ Dependency: pkgName, } // resolve from the package.json file - if err := resolver.ResolvePkgFile(result, pkgPath, licenses); err != nil { + if err := resolver.ResolvePkgFile(result, pkgPath, config.Licenses); err != nil { result.ResolveErrors = append(result.ResolveErrors, err) } // resolve from the LICENSE file - if err := resolver.ResolveLcsFile(result, pkgPath, licenses); err != nil { + if err := resolver.ResolveLcsFile(result, pkgPath, config); err != nil { result.ResolveErrors = append(result.ResolveErrors, err) } @@ -266,7 +266,7 @@ func (resolver *NpmResolver) ResolveLicensesField(licenses []Lcs) (string, bool) } // ResolveLcsFile tries to find the license file to identify the license -func (resolver *NpmResolver) ResolveLcsFile(result *Result, pkgPath string, licenses []*ConfigDepLicense) error { +func (resolver *NpmResolver) ResolveLcsFile(result *Result, pkgPath string, config *ConfigDeps) error { depFiles, err := os.ReadDir(pkgPath) if err != nil { return err @@ -285,13 +285,13 @@ func (resolver *NpmResolver) ResolveLcsFile(result *Result, pkgPath string, lice if result.LicenseSpdxID != "" { return nil } - for _, l := range licenses { + for _, l := range config.Licenses { if l.Name == info.Name() && l.Version == result.Version { result.LicenseSpdxID = l.License return nil } } - identifier, err := license.Identify(string(content)) + identifier, err := license.Identify(string(content), config.Threshold) if err != nil { return err } diff --git a/pkg/deps/resolve.go b/pkg/deps/resolve.go index 325b9d0..b48eb90 100644 --- a/pkg/deps/resolve.go +++ b/pkg/deps/resolve.go @@ -23,7 +23,7 @@ import ( type Resolver interface { CanResolve(string) bool - Resolve(string, []*ConfigDepLicense, *Report) error + Resolve(string, *ConfigDeps, *Report) error } var Resolvers = []Resolver{ @@ -39,7 +39,7 @@ resolveFile: if !resolver.CanResolve(file) { continue } - if err := resolver.Resolve(file, config.License, report); err != nil { + if err := resolver.Resolve(file, config, report); err != nil { return err } continue resolveFile diff --git a/pkg/license/identifier.go b/pkg/license/identifier.go index 858bfe7..5c42cf3 100644 --- a/pkg/license/identifier.go +++ b/pkg/license/identifier.go @@ -29,13 +29,7 @@ import ( "github.com/apache/skywalking-eyes/internal/logger" ) -const ( - // coverageThreshold is the minimum percentage of the file that must contain license text. - // Reference: https://github.com/golang/pkgsite/blob/d43359e3a135fc391960db4f5800eb081d658412/internal/licenses/licenses.go#L48 - coverageThreshold = 75 - - licenseTemplatesDir = "lcs-templates" -) +const licenseTemplatesDir = "lcs-templates" var ( _scanner *licensecheck.Scanner @@ -57,9 +51,9 @@ func scanner() *licensecheck.Scanner { // Identify identifies the Spdx ID of the given license content. // If it's a dual-license, it will return `<Licenses 1> and <Licenses 2>`. -func Identify(content string) (string, error) { +func Identify(content string, threshold int) (string, error) { coverage := scanner().Scan([]byte(content)) - if coverage.Percent < coverageThreshold { + if coverage.Percent < float64(threshold) { return "", fmt.Errorf("cannot identify the license, coverage: %.1f%%", coverage.Percent) } diff --git a/pkg/license/identifier_test.go b/pkg/license/identifier_test.go index c77bd03..ae5b666 100644 --- a/pkg/license/identifier_test.go +++ b/pkg/license/identifier_test.go @@ -21,7 +21,9 @@ import ( "testing" ) -func TestIdentify(t *testing.T) { +const defaultThreshold = 75 + +func TestIdentifyWithDefaultThreshold(t *testing.T) { tests := []struct { name string content string @@ -324,7 +326,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, err := Identify(tt.content) + got, err := Identify(tt.content, defaultThreshold) if (err != nil) != tt.wantErr { t.Errorf("Identify() error = %v, wantErr %v", err, tt.wantErr) return
