This is an automated email from the ASF dual-hosted git repository.
kezhenxu94 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/skywalking-eyes.git
The following commit(s) were added to refs/heads/main by this push:
new db412b1 Make license identifying threshold configurable (#110)
db412b1 is described below
commit db412b1a98b090485f6fa5e45e61e00b04c03ba5
Author: Hoshea Jiang <[email protected]>
AuthorDate: Sat May 21 22:25:09 2022 +0800
Make license identifying threshold configurable (#110)
---
README.md | 2 ++
pkg/deps/config.go | 14 ++++++++++++--
pkg/deps/golang.go | 14 +++++++-------
pkg/deps/jar.go | 12 ++++++------
pkg/deps/jar_test.go | 6 +++++-
pkg/deps/maven.go | 20 ++++++++++----------
pkg/deps/npm.go | 16 ++++++++--------
pkg/deps/resolve.go | 4 ++--
pkg/license/identifier.go | 12 +++---------
pkg/license/identifier_test.go | 6 ++++--
10 files changed, 59 insertions(+), 47 deletions(-)
diff --git a/README.md b/README.md
index 1a966a7..49dd4cb 100644
--- a/README.md
+++ b/README.md
@@ -768,6 +768,7 @@ dependency: # <15>
- name: dependency-name # <18>
version: dependency-version # <19>
license: Apache-2.0 # <20>
+ threshold: 75 # <21>
```
1. The `header` section is configurations for source codes license header.
@@ -790,6 +791,7 @@ dependency: # <15>
18. The `name` of the dependency, The name is different for different
projects, `PackagePath` in Go project, `GroupID:ArtifactID` in maven project,
`PackageName` in NodeJS project.
19. The `version` of the dependency, it's locked, preventing license changed
between different versions.
20. The [SPDX ID](https://spdx.org/licenses/) of the dependency license.
+21. The minimum percentage of the file that must contain license text for
identifying a license, default is `75`.
**NOTE**: When the `SPDX-ID` is Apache-2.0 and the owner is Apache Software
foundation, the content would be [a dedicated
license](https://www.apache.org/legal/src-headers.html#headers) specified by
the ASF, otherwise, the license would be [the standard
one](https://www.apache.org/foundation/license-faq.html#Apply-My-Software).
diff --git a/pkg/deps/config.go b/pkg/deps/config.go
index 64c0a53..d3a77ed 100644
--- a/pkg/deps/config.go
+++ b/pkg/deps/config.go
@@ -22,9 +22,15 @@ import (
"path/filepath"
)
+// DefaultCoverageThreshold is the minimum percentage of the file
+// that must contain license text for identifying a license.
+// Reference:
https://github.com/golang/pkgsite/blob/d43359e3a135fc391960db4f5800eb081d658412/internal/licenses/licenses.go#L48
+const DefaultCoverageThreshold = 75
+
type ConfigDeps struct {
- Files []string `yaml:"files"`
- License []*ConfigDepLicense `yaml:"licenses"`
+ Threshold int `yaml:"threshold"`
+ Files []string `yaml:"files"`
+ Licenses []*ConfigDepLicense `yaml:"licenses"`
}
type ConfigDepLicense struct {
@@ -46,5 +52,9 @@ func (config *ConfigDeps) Finalize(configFile string) error {
config.Files[i] =
filepath.Join(filepath.Dir(configFileAbsPath), file)
}
+ if config.Threshold <= 0 {
+ config.Threshold = DefaultCoverageThreshold
+ }
+
return nil
}
diff --git a/pkg/deps/golang.go b/pkg/deps/golang.go
index 0e93b82..7483005 100644
--- a/pkg/deps/golang.go
+++ b/pkg/deps/golang.go
@@ -45,7 +45,7 @@ func (resolver *GoModResolver) CanResolve(file string) bool {
}
// Resolve resolves licenses of all dependencies declared in the go.mod file.
-func (resolver *GoModResolver) Resolve(goModFile string, licenses
[]*ConfigDepLicense, report *Report) error {
+func (resolver *GoModResolver) Resolve(goModFile string, config *ConfigDeps,
report *Report) error {
if err := os.Chdir(filepath.Dir(goModFile)); err != nil {
return err
}
@@ -78,14 +78,14 @@ func (resolver *GoModResolver) Resolve(goModFile string,
licenses []*ConfigDepLi
logger.Log.Debugln("Module size:", len(modules))
- return resolver.ResolvePackages(modules, licenses, report)
+ return resolver.ResolvePackages(modules, config, report)
}
// ResolvePackages resolves the licenses of the given packages.
-func (resolver *GoModResolver) ResolvePackages(modules []*packages.Module,
licenses []*ConfigDepLicense, report *Report) error {
+func (resolver *GoModResolver) ResolvePackages(modules []*packages.Module,
config *ConfigDeps, report *Report) error {
for _, module := range modules {
func() {
- for _, l := range licenses {
+ for _, l := range config.Licenses {
if l.Name == module.Path && l.Version ==
module.Version {
report.Resolve(&Result{
Dependency: module.Path,
@@ -95,7 +95,7 @@ func (resolver *GoModResolver) ResolvePackages(modules
[]*packages.Module, licen
return
}
}
- err := resolver.ResolvePackageLicense(module, report)
+ err := resolver.ResolvePackageLicense(config, module,
report)
if err != nil {
logger.Log.Warnf("Failed to resolve the license
of <%s@%s>: %v\n", module.Path, module.Version, err)
report.Skip(&Result{
@@ -111,7 +111,7 @@ func (resolver *GoModResolver) ResolvePackages(modules
[]*packages.Module, licen
var possibleLicenseFileName =
regexp.MustCompile(`(?i)^LICENSE|LICENCE(\.txt)?|COPYING(\.txt)?$`)
-func (resolver *GoModResolver) ResolvePackageLicense(module *packages.Module,
report *Report) error {
+func (resolver *GoModResolver) ResolvePackageLicense(config *ConfigDeps,
module *packages.Module, report *Report) error {
dir := module.Dir
for {
@@ -129,7 +129,7 @@ func (resolver *GoModResolver) ResolvePackageLicense(module
*packages.Module, re
if err != nil {
return err
}
- identifier, err := license.Identify(string(content))
+ identifier, err := license.Identify(string(content),
config.Threshold)
if err != nil {
return err
}
diff --git a/pkg/deps/jar.go b/pkg/deps/jar.go
index 31132dd..3c56188 100644
--- a/pkg/deps/jar.go
+++ b/pkg/deps/jar.go
@@ -37,9 +37,9 @@ func (resolver *JarResolver) CanResolve(jarFile string) bool {
return filepath.Ext(jarFile) == ".jar"
}
-func (resolver *JarResolver) Resolve(jarFile string, report *Report) error {
+func (resolver *JarResolver) Resolve(jarFile string, config *ConfigDeps,
report *Report) error {
state := NotFound
- if err := resolver.ResolveJar(&state, jarFile, Unknown, report); err !=
nil {
+ if err := resolver.ResolveJar(config, &state, jarFile, Unknown,
report); err != nil {
dep := filepath.Base(jarFile)
logger.Log.Warnf("Failed to resolve the license of <%s>: %v\n",
dep, state.String())
report.Skip(&Result{
@@ -51,7 +51,7 @@ func (resolver *JarResolver) Resolve(jarFile string, report
*Report) error {
return nil
}
-func (resolver *JarResolver) ResolveJar(state *State, jarFile, version string,
report *Report) error {
+func (resolver *JarResolver) ResolveJar(config *ConfigDeps, state *State,
jarFile, version string, report *Report) error {
dep := filepath.Base(jarFile)
compressedJar, err := zip.OpenReader(jarFile)
@@ -76,7 +76,7 @@ func (resolver *JarResolver) ResolveJar(state *State,
jarFile, version string, r
return err
}
- return resolver.IdentifyLicense(jarFile, dep,
buf.String(), version, report)
+ return resolver.IdentifyLicense(config, jarFile, dep,
buf.String(), version, report)
}
}
@@ -122,8 +122,8 @@ func (resolver *JarResolver) ReadFileFromZip(archiveFile
*zip.File) (*bytes.Buff
return buf, nil
}
-func (resolver *JarResolver) IdentifyLicense(path, dep, content, version
string, report *Report) error {
- identifier, err := license.Identify(content)
+func (resolver *JarResolver) IdentifyLicense(config *ConfigDeps, path, dep,
content, version string, report *Report) error {
+ identifier, err := license.Identify(content, config.Threshold)
if err != nil {
return err
}
diff --git a/pkg/deps/jar_test.go b/pkg/deps/jar_test.go
index 8b18b43..3884a8e 100644
--- a/pkg/deps/jar_test.go
+++ b/pkg/deps/jar_test.go
@@ -79,6 +79,10 @@ func copyJars(t *testing.T, pomFile, content string)
([]string, error) {
}
func TestResolveJar(t *testing.T) {
+ config := &deps.ConfigDeps{
+ Threshold: deps.DefaultCoverageThreshold,
+ }
+
if _, err := exec.Command("mvn", "--version").Output(); err != nil {
logger.Log.Warnf("Failed to find mvn, the test `TestResolveJar`
was skipped")
return
@@ -132,7 +136,7 @@ func TestResolveJar(t *testing.T) {
report := deps.Report{}
for _, jar := range jars {
if resolver.CanResolve(jar) {
- if err := resolver.Resolve(jar, &report); err
!= nil {
+ if err := resolver.Resolve(jar, config,
&report); err != nil {
t.Error(err)
return
}
diff --git a/pkg/deps/maven.go b/pkg/deps/maven.go
index 3d74d41..23c6324 100644
--- a/pkg/deps/maven.go
+++ b/pkg/deps/maven.go
@@ -48,7 +48,7 @@ func (resolver *MavenPomResolver) CanResolve(mavenPomFile
string) bool {
}
// Resolve resolves licenses of all dependencies declared in the pom.xml file.
-func (resolver *MavenPomResolver) Resolve(mavenPomFile string, licenses
[]*ConfigDepLicense, report *Report) error {
+func (resolver *MavenPomResolver) Resolve(mavenPomFile string, config
*ConfigDeps, report *Report) error {
if err := os.Chdir(filepath.Dir(mavenPomFile)); err != nil {
return err
}
@@ -70,7 +70,7 @@ func (resolver *MavenPomResolver) Resolve(mavenPomFile
string, licenses []*Confi
}
}
- return resolver.ResolveDependencies(deps, licenses, report)
+ return resolver.ResolveDependencies(deps, config, report)
}
// CheckMVN check available maven tools, find local repositories and download
all dependencies
@@ -142,10 +142,10 @@ func (resolver *MavenPomResolver) LoadDependencies()
([]*Dependency, error) {
}
// ResolveDependencies resolves the licenses of the given dependencies
-func (resolver *MavenPomResolver) ResolveDependencies(deps []*Dependency,
licenses []*ConfigDepLicense, report *Report) error {
+func (resolver *MavenPomResolver) ResolveDependencies(deps []*Dependency,
config *ConfigDeps, report *Report) error {
for _, dep := range deps {
func() {
- for _, l := range licenses {
+ for _, l := range config.Licenses {
if l.Name == fmt.Sprintf("%s:%s", dep.GroupID,
dep.ArtifactID) && l.Version == dep.Version {
report.Resolve(&Result{
Dependency: dep.Jar(),
@@ -156,7 +156,7 @@ func (resolver *MavenPomResolver) ResolveDependencies(deps
[]*Dependency, licens
}
}
state := NotFound
- err := resolver.ResolveLicense(&state, dep, report)
+ err := resolver.ResolveLicense(config, &state, dep,
report)
if err != nil {
logger.Log.Warnf("Failed to resolve the license
of <%s>: %v\n", dep.Jar(), state.String())
report.Skip(&Result{
@@ -171,17 +171,17 @@ func (resolver *MavenPomResolver)
ResolveDependencies(deps []*Dependency, licens
}
// ResolveLicense search all possible locations of the license, such as pom
file, jar package
-func (resolver *MavenPomResolver) ResolveLicense(state *State, dep
*Dependency, report *Report) error {
- err := resolver.ResolveJar(state, filepath.Join(resolver.repo,
dep.Path(), dep.Jar()), dep.Version, report)
+func (resolver *MavenPomResolver) ResolveLicense(config *ConfigDeps, state
*State, dep *Dependency, report *Report) error {
+ err := resolver.ResolveJar(config, state, filepath.Join(resolver.repo,
dep.Path(), dep.Jar()), dep.Version, report)
if err == nil {
return nil
}
- return resolver.ResolveLicenseFromPom(state, dep, report)
+ return resolver.ResolveLicenseFromPom(config, state, dep, report)
}
// ResolveLicenseFromPom search for license in the pom file, which may appear
in the header comments or in license element of xml
-func (resolver *MavenPomResolver) ResolveLicenseFromPom(state *State, dep
*Dependency, report *Report) (err error) {
+func (resolver *MavenPomResolver) ResolveLicenseFromPom(config *ConfigDeps,
state *State, dep *Dependency, report *Report) (err error) {
pomFile := filepath.Join(resolver.repo, dep.Path(), dep.Pom())
pom, err := resolver.ReadLicensesFromPom(pomFile)
@@ -204,7 +204,7 @@ func (resolver *MavenPomResolver)
ResolveLicenseFromPom(state *State, dep *Depen
return err
} else if headerComments != "" {
*state |= FoundLicenseInPomHeader
- return resolver.IdentifyLicense(pomFile, dep.Jar(),
headerComments, dep.Version, report)
+ return resolver.IdentifyLicense(config, pomFile, dep.Jar(),
headerComments, dep.Version, report)
}
return fmt.Errorf("not found in pom file")
diff --git a/pkg/deps/npm.go b/pkg/deps/npm.go
index 18ba59b..3fc1a0c 100644
--- a/pkg/deps/npm.go
+++ b/pkg/deps/npm.go
@@ -63,7 +63,7 @@ func (resolver *NpmResolver) CanResolve(file string) bool {
}
// Resolve resolves licenses of all dependencies declared in the package.json
file.
-func (resolver *NpmResolver) Resolve(pkgFile string, licenses
[]*ConfigDepLicense, report *Report) error {
+func (resolver *NpmResolver) Resolve(pkgFile string, config *ConfigDeps,
report *Report) error {
workDir := filepath.Dir(pkgFile)
if err := os.Chdir(workDir); err != nil {
return err
@@ -85,7 +85,7 @@ func (resolver *NpmResolver) Resolve(pkgFile string, licenses
[]*ConfigDepLicens
// Walk through each package's root directory to resolve licenses
// Resolve from a package's package.json file or its license file
for _, pkg := range pkgs {
- if result := resolver.ResolvePackageLicense(pkg.Name, pkg.Path,
licenses); result.LicenseSpdxID != "" {
+ if result := resolver.ResolvePackageLicense(pkg.Name, pkg.Path,
config); result.LicenseSpdxID != "" {
report.Resolve(result)
} else {
result.LicenseSpdxID = Unknown
@@ -185,17 +185,17 @@ func (resolver *NpmResolver) GetInstalledPkgs(pkgDir
string) []*Package {
// First, try to find and parse the package's package.json file to check the
license file
// If the previous step fails, then try to identify the package's LICENSE file
// It's a necessary procedure to check the LICENSE file, because the resolver
needs to record the license content
-func (resolver *NpmResolver) ResolvePackageLicense(pkgName, pkgPath string,
licenses []*ConfigDepLicense) *Result {
+func (resolver *NpmResolver) ResolvePackageLicense(pkgName, pkgPath string,
config *ConfigDeps) *Result {
result := &Result{
Dependency: pkgName,
}
// resolve from the package.json file
- if err := resolver.ResolvePkgFile(result, pkgPath, licenses); err !=
nil {
+ if err := resolver.ResolvePkgFile(result, pkgPath, config.Licenses);
err != nil {
result.ResolveErrors = append(result.ResolveErrors, err)
}
// resolve from the LICENSE file
- if err := resolver.ResolveLcsFile(result, pkgPath, licenses); err !=
nil {
+ if err := resolver.ResolveLcsFile(result, pkgPath, config); err != nil {
result.ResolveErrors = append(result.ResolveErrors, err)
}
@@ -266,7 +266,7 @@ func (resolver *NpmResolver) ResolveLicensesField(licenses
[]Lcs) (string, bool)
}
// ResolveLcsFile tries to find the license file to identify the license
-func (resolver *NpmResolver) ResolveLcsFile(result *Result, pkgPath string,
licenses []*ConfigDepLicense) error {
+func (resolver *NpmResolver) ResolveLcsFile(result *Result, pkgPath string,
config *ConfigDeps) error {
depFiles, err := os.ReadDir(pkgPath)
if err != nil {
return err
@@ -285,13 +285,13 @@ func (resolver *NpmResolver) ResolveLcsFile(result
*Result, pkgPath string, lice
if result.LicenseSpdxID != "" {
return nil
}
- for _, l := range licenses {
+ for _, l := range config.Licenses {
if l.Name == info.Name() && l.Version == result.Version
{
result.LicenseSpdxID = l.License
return nil
}
}
- identifier, err := license.Identify(string(content))
+ identifier, err := license.Identify(string(content),
config.Threshold)
if err != nil {
return err
}
diff --git a/pkg/deps/resolve.go b/pkg/deps/resolve.go
index 325b9d0..b48eb90 100644
--- a/pkg/deps/resolve.go
+++ b/pkg/deps/resolve.go
@@ -23,7 +23,7 @@ import (
type Resolver interface {
CanResolve(string) bool
- Resolve(string, []*ConfigDepLicense, *Report) error
+ Resolve(string, *ConfigDeps, *Report) error
}
var Resolvers = []Resolver{
@@ -39,7 +39,7 @@ resolveFile:
if !resolver.CanResolve(file) {
continue
}
- if err := resolver.Resolve(file, config.License,
report); err != nil {
+ if err := resolver.Resolve(file, config, report); err
!= nil {
return err
}
continue resolveFile
diff --git a/pkg/license/identifier.go b/pkg/license/identifier.go
index 858bfe7..5c42cf3 100644
--- a/pkg/license/identifier.go
+++ b/pkg/license/identifier.go
@@ -29,13 +29,7 @@ import (
"github.com/apache/skywalking-eyes/internal/logger"
)
-const (
- // coverageThreshold is the minimum percentage of the file that must
contain license text.
- // Reference:
https://github.com/golang/pkgsite/blob/d43359e3a135fc391960db4f5800eb081d658412/internal/licenses/licenses.go#L48
- coverageThreshold = 75
-
- licenseTemplatesDir = "lcs-templates"
-)
+const licenseTemplatesDir = "lcs-templates"
var (
_scanner *licensecheck.Scanner
@@ -57,9 +51,9 @@ func scanner() *licensecheck.Scanner {
// Identify identifies the Spdx ID of the given license content.
// If it's a dual-license, it will return `<Licenses 1> and <Licenses 2>`.
-func Identify(content string) (string, error) {
+func Identify(content string, threshold int) (string, error) {
coverage := scanner().Scan([]byte(content))
- if coverage.Percent < coverageThreshold {
+ if coverage.Percent < float64(threshold) {
return "", fmt.Errorf("cannot identify the license, coverage:
%.1f%%", coverage.Percent)
}
diff --git a/pkg/license/identifier_test.go b/pkg/license/identifier_test.go
index c77bd03..ae5b666 100644
--- a/pkg/license/identifier_test.go
+++ b/pkg/license/identifier_test.go
@@ -21,7 +21,9 @@ import (
"testing"
)
-func TestIdentify(t *testing.T) {
+const defaultThreshold = 75
+
+func TestIdentifyWithDefaultThreshold(t *testing.T) {
tests := []struct {
name string
content string
@@ -324,7 +326,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- got, err := Identify(tt.content)
+ got, err := Identify(tt.content, defaultThreshold)
if (err != nil) != tt.wantErr {
t.Errorf("Identify() error = %v, wantErr %v",
err, tt.wantErr)
return