This is an automated email from the ASF dual-hosted git repository.

kezhenxu94 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/skywalking-eyes.git


The following commit(s) were added to refs/heads/main by this push:
     new db412b1  Make license identifying threshold configurable (#110)
db412b1 is described below

commit db412b1a98b090485f6fa5e45e61e00b04c03ba5
Author: Hoshea Jiang <[email protected]>
AuthorDate: Sat May 21 22:25:09 2022 +0800

    Make license identifying threshold configurable (#110)
---
 README.md                      |  2 ++
 pkg/deps/config.go             | 14 ++++++++++++--
 pkg/deps/golang.go             | 14 +++++++-------
 pkg/deps/jar.go                | 12 ++++++------
 pkg/deps/jar_test.go           |  6 +++++-
 pkg/deps/maven.go              | 20 ++++++++++----------
 pkg/deps/npm.go                | 16 ++++++++--------
 pkg/deps/resolve.go            |  4 ++--
 pkg/license/identifier.go      | 12 +++---------
 pkg/license/identifier_test.go |  6 ++++--
 10 files changed, 59 insertions(+), 47 deletions(-)

diff --git a/README.md b/README.md
index 1a966a7..49dd4cb 100644
--- a/README.md
+++ b/README.md
@@ -768,6 +768,7 @@ dependency: # <15>
     - name: dependency-name # <18>
       version: dependency-version # <19>
       license: Apache-2.0 # <20>
+  threshold: 75 # <21>
 ```
 
 1. The `header` section is configurations for source codes license header.
@@ -790,6 +791,7 @@ dependency: # <15>
 18. The `name` of the dependency, The name is different for different 
projects, `PackagePath` in Go project, `GroupID:ArtifactID` in maven project, 
`PackageName` in NodeJS project.
 19. The `version` of the dependency, it's locked, preventing license changed 
between different versions.
 20. The [SPDX ID](https://spdx.org/licenses/) of the dependency license.
+21. The minimum percentage of the file that must contain license text for 
identifying a license, default is `75`.
 
 **NOTE**: When the `SPDX-ID` is Apache-2.0 and the owner is Apache Software 
foundation, the content would be [a dedicated 
license](https://www.apache.org/legal/src-headers.html#headers) specified by 
the ASF, otherwise, the license would be [the standard 
one](https://www.apache.org/foundation/license-faq.html#Apply-My-Software).
 
diff --git a/pkg/deps/config.go b/pkg/deps/config.go
index 64c0a53..d3a77ed 100644
--- a/pkg/deps/config.go
+++ b/pkg/deps/config.go
@@ -22,9 +22,15 @@ import (
        "path/filepath"
 )
 
+// DefaultCoverageThreshold is the minimum percentage of the file
+// that must contain license text for identifying a license.
+// Reference: 
https://github.com/golang/pkgsite/blob/d43359e3a135fc391960db4f5800eb081d658412/internal/licenses/licenses.go#L48
+const DefaultCoverageThreshold = 75
+
 type ConfigDeps struct {
-       Files   []string            `yaml:"files"`
-       License []*ConfigDepLicense `yaml:"licenses"`
+       Threshold int                 `yaml:"threshold"`
+       Files     []string            `yaml:"files"`
+       Licenses  []*ConfigDepLicense `yaml:"licenses"`
 }
 
 type ConfigDepLicense struct {
@@ -46,5 +52,9 @@ func (config *ConfigDeps) Finalize(configFile string) error {
                config.Files[i] = 
filepath.Join(filepath.Dir(configFileAbsPath), file)
        }
 
+       if config.Threshold <= 0 {
+               config.Threshold = DefaultCoverageThreshold
+       }
+
        return nil
 }
diff --git a/pkg/deps/golang.go b/pkg/deps/golang.go
index 0e93b82..7483005 100644
--- a/pkg/deps/golang.go
+++ b/pkg/deps/golang.go
@@ -45,7 +45,7 @@ func (resolver *GoModResolver) CanResolve(file string) bool {
 }
 
 // Resolve resolves licenses of all dependencies declared in the go.mod file.
-func (resolver *GoModResolver) Resolve(goModFile string, licenses 
[]*ConfigDepLicense, report *Report) error {
+func (resolver *GoModResolver) Resolve(goModFile string, config *ConfigDeps, 
report *Report) error {
        if err := os.Chdir(filepath.Dir(goModFile)); err != nil {
                return err
        }
@@ -78,14 +78,14 @@ func (resolver *GoModResolver) Resolve(goModFile string, 
licenses []*ConfigDepLi
 
        logger.Log.Debugln("Module size:", len(modules))
 
-       return resolver.ResolvePackages(modules, licenses, report)
+       return resolver.ResolvePackages(modules, config, report)
 }
 
 // ResolvePackages resolves the licenses of the given packages.
-func (resolver *GoModResolver) ResolvePackages(modules []*packages.Module, 
licenses []*ConfigDepLicense, report *Report) error {
+func (resolver *GoModResolver) ResolvePackages(modules []*packages.Module, 
config *ConfigDeps, report *Report) error {
        for _, module := range modules {
                func() {
-                       for _, l := range licenses {
+                       for _, l := range config.Licenses {
                                if l.Name == module.Path && l.Version == 
module.Version {
                                        report.Resolve(&Result{
                                                Dependency:    module.Path,
@@ -95,7 +95,7 @@ func (resolver *GoModResolver) ResolvePackages(modules 
[]*packages.Module, licen
                                        return
                                }
                        }
-                       err := resolver.ResolvePackageLicense(module, report)
+                       err := resolver.ResolvePackageLicense(config, module, 
report)
                        if err != nil {
                                logger.Log.Warnf("Failed to resolve the license 
of <%s@%s>: %v\n", module.Path, module.Version, err)
                                report.Skip(&Result{
@@ -111,7 +111,7 @@ func (resolver *GoModResolver) ResolvePackages(modules 
[]*packages.Module, licen
 
 var possibleLicenseFileName = 
regexp.MustCompile(`(?i)^LICENSE|LICENCE(\.txt)?|COPYING(\.txt)?$`)
 
-func (resolver *GoModResolver) ResolvePackageLicense(module *packages.Module, 
report *Report) error {
+func (resolver *GoModResolver) ResolvePackageLicense(config *ConfigDeps, 
module *packages.Module, report *Report) error {
        dir := module.Dir
 
        for {
@@ -129,7 +129,7 @@ func (resolver *GoModResolver) ResolvePackageLicense(module 
*packages.Module, re
                        if err != nil {
                                return err
                        }
-                       identifier, err := license.Identify(string(content))
+                       identifier, err := license.Identify(string(content), 
config.Threshold)
                        if err != nil {
                                return err
                        }
diff --git a/pkg/deps/jar.go b/pkg/deps/jar.go
index 31132dd..3c56188 100644
--- a/pkg/deps/jar.go
+++ b/pkg/deps/jar.go
@@ -37,9 +37,9 @@ func (resolver *JarResolver) CanResolve(jarFile string) bool {
        return filepath.Ext(jarFile) == ".jar"
 }
 
-func (resolver *JarResolver) Resolve(jarFile string, report *Report) error {
+func (resolver *JarResolver) Resolve(jarFile string, config *ConfigDeps, 
report *Report) error {
        state := NotFound
-       if err := resolver.ResolveJar(&state, jarFile, Unknown, report); err != 
nil {
+       if err := resolver.ResolveJar(config, &state, jarFile, Unknown, 
report); err != nil {
                dep := filepath.Base(jarFile)
                logger.Log.Warnf("Failed to resolve the license of <%s>: %v\n", 
dep, state.String())
                report.Skip(&Result{
@@ -51,7 +51,7 @@ func (resolver *JarResolver) Resolve(jarFile string, report 
*Report) error {
        return nil
 }
 
-func (resolver *JarResolver) ResolveJar(state *State, jarFile, version string, 
report *Report) error {
+func (resolver *JarResolver) ResolveJar(config *ConfigDeps, state *State, 
jarFile, version string, report *Report) error {
        dep := filepath.Base(jarFile)
 
        compressedJar, err := zip.OpenReader(jarFile)
@@ -76,7 +76,7 @@ func (resolver *JarResolver) ResolveJar(state *State, 
jarFile, version string, r
                                return err
                        }
 
-                       return resolver.IdentifyLicense(jarFile, dep, 
buf.String(), version, report)
+                       return resolver.IdentifyLicense(config, jarFile, dep, 
buf.String(), version, report)
                }
        }
 
@@ -122,8 +122,8 @@ func (resolver *JarResolver) ReadFileFromZip(archiveFile 
*zip.File) (*bytes.Buff
        return buf, nil
 }
 
-func (resolver *JarResolver) IdentifyLicense(path, dep, content, version 
string, report *Report) error {
-       identifier, err := license.Identify(content)
+func (resolver *JarResolver) IdentifyLicense(config *ConfigDeps, path, dep, 
content, version string, report *Report) error {
+       identifier, err := license.Identify(content, config.Threshold)
        if err != nil {
                return err
        }
diff --git a/pkg/deps/jar_test.go b/pkg/deps/jar_test.go
index 8b18b43..3884a8e 100644
--- a/pkg/deps/jar_test.go
+++ b/pkg/deps/jar_test.go
@@ -79,6 +79,10 @@ func copyJars(t *testing.T, pomFile, content string) 
([]string, error) {
 }
 
 func TestResolveJar(t *testing.T) {
+       config := &deps.ConfigDeps{
+               Threshold: deps.DefaultCoverageThreshold,
+       }
+
        if _, err := exec.Command("mvn", "--version").Output(); err != nil {
                logger.Log.Warnf("Failed to find mvn, the test `TestResolveJar` 
was skipped")
                return
@@ -132,7 +136,7 @@ func TestResolveJar(t *testing.T) {
                report := deps.Report{}
                for _, jar := range jars {
                        if resolver.CanResolve(jar) {
-                               if err := resolver.Resolve(jar, &report); err 
!= nil {
+                               if err := resolver.Resolve(jar, config, 
&report); err != nil {
                                        t.Error(err)
                                        return
                                }
diff --git a/pkg/deps/maven.go b/pkg/deps/maven.go
index 3d74d41..23c6324 100644
--- a/pkg/deps/maven.go
+++ b/pkg/deps/maven.go
@@ -48,7 +48,7 @@ func (resolver *MavenPomResolver) CanResolve(mavenPomFile 
string) bool {
 }
 
 // Resolve resolves licenses of all dependencies declared in the pom.xml file.
-func (resolver *MavenPomResolver) Resolve(mavenPomFile string, licenses 
[]*ConfigDepLicense, report *Report) error {
+func (resolver *MavenPomResolver) Resolve(mavenPomFile string, config 
*ConfigDeps, report *Report) error {
        if err := os.Chdir(filepath.Dir(mavenPomFile)); err != nil {
                return err
        }
@@ -70,7 +70,7 @@ func (resolver *MavenPomResolver) Resolve(mavenPomFile 
string, licenses []*Confi
                }
        }
 
-       return resolver.ResolveDependencies(deps, licenses, report)
+       return resolver.ResolveDependencies(deps, config, report)
 }
 
 // CheckMVN check available maven tools, find local repositories and download 
all dependencies
@@ -142,10 +142,10 @@ func (resolver *MavenPomResolver) LoadDependencies() 
([]*Dependency, error) {
 }
 
 // ResolveDependencies resolves the licenses of the given dependencies
-func (resolver *MavenPomResolver) ResolveDependencies(deps []*Dependency, 
licenses []*ConfigDepLicense, report *Report) error {
+func (resolver *MavenPomResolver) ResolveDependencies(deps []*Dependency, 
config *ConfigDeps, report *Report) error {
        for _, dep := range deps {
                func() {
-                       for _, l := range licenses {
+                       for _, l := range config.Licenses {
                                if l.Name == fmt.Sprintf("%s:%s", dep.GroupID, 
dep.ArtifactID) && l.Version == dep.Version {
                                        report.Resolve(&Result{
                                                Dependency:    dep.Jar(),
@@ -156,7 +156,7 @@ func (resolver *MavenPomResolver) ResolveDependencies(deps 
[]*Dependency, licens
                                }
                        }
                        state := NotFound
-                       err := resolver.ResolveLicense(&state, dep, report)
+                       err := resolver.ResolveLicense(config, &state, dep, 
report)
                        if err != nil {
                                logger.Log.Warnf("Failed to resolve the license 
of <%s>: %v\n", dep.Jar(), state.String())
                                report.Skip(&Result{
@@ -171,17 +171,17 @@ func (resolver *MavenPomResolver) 
ResolveDependencies(deps []*Dependency, licens
 }
 
 // ResolveLicense search all possible locations of the license, such as pom 
file, jar package
-func (resolver *MavenPomResolver) ResolveLicense(state *State, dep 
*Dependency, report *Report) error {
-       err := resolver.ResolveJar(state, filepath.Join(resolver.repo, 
dep.Path(), dep.Jar()), dep.Version, report)
+func (resolver *MavenPomResolver) ResolveLicense(config *ConfigDeps, state 
*State, dep *Dependency, report *Report) error {
+       err := resolver.ResolveJar(config, state, filepath.Join(resolver.repo, 
dep.Path(), dep.Jar()), dep.Version, report)
        if err == nil {
                return nil
        }
 
-       return resolver.ResolveLicenseFromPom(state, dep, report)
+       return resolver.ResolveLicenseFromPom(config, state, dep, report)
 }
 
 // ResolveLicenseFromPom search for license in the pom file, which may appear 
in the header comments or in license element of xml
-func (resolver *MavenPomResolver) ResolveLicenseFromPom(state *State, dep 
*Dependency, report *Report) (err error) {
+func (resolver *MavenPomResolver) ResolveLicenseFromPom(config *ConfigDeps, 
state *State, dep *Dependency, report *Report) (err error) {
        pomFile := filepath.Join(resolver.repo, dep.Path(), dep.Pom())
 
        pom, err := resolver.ReadLicensesFromPom(pomFile)
@@ -204,7 +204,7 @@ func (resolver *MavenPomResolver) 
ResolveLicenseFromPom(state *State, dep *Depen
                return err
        } else if headerComments != "" {
                *state |= FoundLicenseInPomHeader
-               return resolver.IdentifyLicense(pomFile, dep.Jar(), 
headerComments, dep.Version, report)
+               return resolver.IdentifyLicense(config, pomFile, dep.Jar(), 
headerComments, dep.Version, report)
        }
 
        return fmt.Errorf("not found in pom file")
diff --git a/pkg/deps/npm.go b/pkg/deps/npm.go
index 18ba59b..3fc1a0c 100644
--- a/pkg/deps/npm.go
+++ b/pkg/deps/npm.go
@@ -63,7 +63,7 @@ func (resolver *NpmResolver) CanResolve(file string) bool {
 }
 
 // Resolve resolves licenses of all dependencies declared in the package.json 
file.
-func (resolver *NpmResolver) Resolve(pkgFile string, licenses 
[]*ConfigDepLicense, report *Report) error {
+func (resolver *NpmResolver) Resolve(pkgFile string, config *ConfigDeps, 
report *Report) error {
        workDir := filepath.Dir(pkgFile)
        if err := os.Chdir(workDir); err != nil {
                return err
@@ -85,7 +85,7 @@ func (resolver *NpmResolver) Resolve(pkgFile string, licenses 
[]*ConfigDepLicens
        // Walk through each package's root directory to resolve licenses
        // Resolve from a package's package.json file or its license file
        for _, pkg := range pkgs {
-               if result := resolver.ResolvePackageLicense(pkg.Name, pkg.Path, 
licenses); result.LicenseSpdxID != "" {
+               if result := resolver.ResolvePackageLicense(pkg.Name, pkg.Path, 
config); result.LicenseSpdxID != "" {
                        report.Resolve(result)
                } else {
                        result.LicenseSpdxID = Unknown
@@ -185,17 +185,17 @@ func (resolver *NpmResolver) GetInstalledPkgs(pkgDir 
string) []*Package {
 // First, try to find and parse the package's package.json file to check the 
license file
 // If the previous step fails, then try to identify the package's LICENSE file
 // It's a necessary procedure to check the LICENSE file, because the resolver 
needs to record the license content
-func (resolver *NpmResolver) ResolvePackageLicense(pkgName, pkgPath string, 
licenses []*ConfigDepLicense) *Result {
+func (resolver *NpmResolver) ResolvePackageLicense(pkgName, pkgPath string, 
config *ConfigDeps) *Result {
        result := &Result{
                Dependency: pkgName,
        }
        // resolve from the package.json file
-       if err := resolver.ResolvePkgFile(result, pkgPath, licenses); err != 
nil {
+       if err := resolver.ResolvePkgFile(result, pkgPath, config.Licenses); 
err != nil {
                result.ResolveErrors = append(result.ResolveErrors, err)
        }
 
        // resolve from the LICENSE file
-       if err := resolver.ResolveLcsFile(result, pkgPath, licenses); err != 
nil {
+       if err := resolver.ResolveLcsFile(result, pkgPath, config); err != nil {
                result.ResolveErrors = append(result.ResolveErrors, err)
        }
 
@@ -266,7 +266,7 @@ func (resolver *NpmResolver) ResolveLicensesField(licenses 
[]Lcs) (string, bool)
 }
 
 // ResolveLcsFile tries to find the license file to identify the license
-func (resolver *NpmResolver) ResolveLcsFile(result *Result, pkgPath string, 
licenses []*ConfigDepLicense) error {
+func (resolver *NpmResolver) ResolveLcsFile(result *Result, pkgPath string, 
config *ConfigDeps) error {
        depFiles, err := os.ReadDir(pkgPath)
        if err != nil {
                return err
@@ -285,13 +285,13 @@ func (resolver *NpmResolver) ResolveLcsFile(result 
*Result, pkgPath string, lice
                if result.LicenseSpdxID != "" {
                        return nil
                }
-               for _, l := range licenses {
+               for _, l := range config.Licenses {
                        if l.Name == info.Name() && l.Version == result.Version 
{
                                result.LicenseSpdxID = l.License
                                return nil
                        }
                }
-               identifier, err := license.Identify(string(content))
+               identifier, err := license.Identify(string(content), 
config.Threshold)
                if err != nil {
                        return err
                }
diff --git a/pkg/deps/resolve.go b/pkg/deps/resolve.go
index 325b9d0..b48eb90 100644
--- a/pkg/deps/resolve.go
+++ b/pkg/deps/resolve.go
@@ -23,7 +23,7 @@ import (
 
 type Resolver interface {
        CanResolve(string) bool
-       Resolve(string, []*ConfigDepLicense, *Report) error
+       Resolve(string, *ConfigDeps, *Report) error
 }
 
 var Resolvers = []Resolver{
@@ -39,7 +39,7 @@ resolveFile:
                        if !resolver.CanResolve(file) {
                                continue
                        }
-                       if err := resolver.Resolve(file, config.License, 
report); err != nil {
+                       if err := resolver.Resolve(file, config, report); err 
!= nil {
                                return err
                        }
                        continue resolveFile
diff --git a/pkg/license/identifier.go b/pkg/license/identifier.go
index 858bfe7..5c42cf3 100644
--- a/pkg/license/identifier.go
+++ b/pkg/license/identifier.go
@@ -29,13 +29,7 @@ import (
        "github.com/apache/skywalking-eyes/internal/logger"
 )
 
-const (
-       // coverageThreshold is the minimum percentage of the file that must 
contain license text.
-       // Reference: 
https://github.com/golang/pkgsite/blob/d43359e3a135fc391960db4f5800eb081d658412/internal/licenses/licenses.go#L48
-       coverageThreshold = 75
-
-       licenseTemplatesDir = "lcs-templates"
-)
+const licenseTemplatesDir = "lcs-templates"
 
 var (
        _scanner    *licensecheck.Scanner
@@ -57,9 +51,9 @@ func scanner() *licensecheck.Scanner {
 
 // Identify identifies the Spdx ID of the given license content.
 // If it's a dual-license, it will return `<Licenses 1> and <Licenses 2>`.
-func Identify(content string) (string, error) {
+func Identify(content string, threshold int) (string, error) {
        coverage := scanner().Scan([]byte(content))
-       if coverage.Percent < coverageThreshold {
+       if coverage.Percent < float64(threshold) {
                return "", fmt.Errorf("cannot identify the license, coverage: 
%.1f%%", coverage.Percent)
        }
 
diff --git a/pkg/license/identifier_test.go b/pkg/license/identifier_test.go
index c77bd03..ae5b666 100644
--- a/pkg/license/identifier_test.go
+++ b/pkg/license/identifier_test.go
@@ -21,7 +21,9 @@ import (
        "testing"
 )
 
-func TestIdentify(t *testing.T) {
+const defaultThreshold = 75
+
+func TestIdentifyWithDefaultThreshold(t *testing.T) {
        tests := []struct {
                name    string
                content string
@@ -324,7 +326,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
SUCH DAMAGE.
        }
        for _, tt := range tests {
                t.Run(tt.name, func(t *testing.T) {
-                       got, err := Identify(tt.content)
+                       got, err := Identify(tt.content, defaultThreshold)
                        if (err != nil) != tt.wantErr {
                                t.Errorf("Identify() error = %v, wantErr %v", 
err, tt.wantErr)
                                return

Reply via email to