This is an automated email from the ASF dual-hosted git repository. kezhenxu94 pushed a commit to branch urls in repository https://gitbox.apache.org/repos/asf/skywalking-eyes.git
commit afa773c2b00e2a3bee450c5798508920e9df6683 Author: kezhenxu94 <[email protected]> AuthorDate: Sun May 22 16:29:38 2022 +0800 Add custom license urls for identification --- README.md | 2 +- assets/urls.yaml | 18 ++++++++++++ go.mod | 2 ++ go.sum | 4 +-- pkg/deps/golang.go | 17 +++++++----- pkg/deps/jar.go | 58 +++++++++++++++++++++++++-------------- pkg/deps/maven.go | 62 ++++++++++++++++++++++++++---------------- pkg/deps/npm.go | 16 +++++++---- pkg/deps/resolve.go | 1 + pkg/license/identifier.go | 26 +++++++++++++++++- pkg/license/identifier_test.go | 30 ++++++++++++++++++++ 11 files changed, 174 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index 49dd4cb..ef366de 100644 --- a/README.md +++ b/README.md @@ -788,7 +788,7 @@ dependency: # <15> 15. The `dependency` section is configurations for resolving dependencies' licenses. 16. The `files` are the files that declare the dependencies of a project, typically, `go.mod` in Go project, `pom.xml` in maven project, and `package.json` in NodeJS project. If it's a relative path, it's relative to the `.licenserc.yaml`. 17. Declare the licenses which cannot be identified by this tool. -18. The `name` of the dependency, The name is different for different projects, `PackagePath` in Go project, `GroupID:ArtifactID` in maven project, `PackageName` in NodeJS project. +18. The `name` of the dependency, The name is different for different projects, `PackagePath` in Go project, `.jar` file in maven project, `PackageName` in NodeJS project. 19. The `version` of the dependency, it's locked, preventing license changed between different versions. 20. The [SPDX ID](https://spdx.org/licenses/) of the dependency license. 21. The minimum percentage of the file that must contain license text for identifying a license, default is `75`. diff --git a/assets/urls.yaml b/assets/urls.yaml new file mode 100644 index 0000000..8af29b0 --- /dev/null +++ b/assets/urls.yaml @@ -0,0 +1,18 @@ +Apache-2.0: + - apache.org/licenses/license-2.0 + - apache.org/licenses/license-2.0.txt + - apache.org/license/license-2.0 + - apache.org/license/license-2.0.txt + +MIT: + - bouncycastle.org/licence + - opensource.org/licenses/mit-license.php + +LGPL-2.1: + - gnu.org/licenses/old-licenses/lgpl-2.1 + +GPL-2.0-with-classpath-exception: + - gnu.org/software/classpath/license + +EPL-1.0: + - eclipse.org/legal/epl-v10 diff --git a/go.mod b/go.mod index 15ff201..fb294bb 100644 --- a/go.mod +++ b/go.mod @@ -21,3 +21,5 @@ require ( google.golang.org/protobuf v1.28.0 // indirect gopkg.in/yaml.v3 v3.0.0-20220512140231-539c8e751b99 ) + +replace github.com/google/licensecheck v0.3.1 => github.com/kezhenxu94/licensecheck v0.3.2-0.20220522082511-5121cbc45274 diff --git a/go.sum b/go.sum index ae2f5ee..09dd6b4 100644 --- a/go.sum +++ b/go.sum @@ -96,8 +96,6 @@ github.com/google/go-github/v33 v33.0.0/go.mod h1:GMdDnVZY/2TsWgp/lkYnpSAh6TrzhA github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= -github.com/google/licensecheck v0.3.1 h1:QoxgoDkaeC4nFrtGN1jV7IPmDCHFNIVh54e5hSt6sPs= -github.com/google/licensecheck v0.3.1/go.mod h1:ORkR35t/JjW+emNKtfJDII0zlciG9JgbT7SmsohlHmY= github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= @@ -117,6 +115,8 @@ github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NH github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= +github.com/kezhenxu94/licensecheck v0.3.2-0.20220522082511-5121cbc45274 h1:70F5y4oVIT9OoQ61ujeWgycYonXqW6kD6Z0LLi3V73c= +github.com/kezhenxu94/licensecheck v0.3.2-0.20220522082511-5121cbc45274/go.mod h1:ORkR35t/JjW+emNKtfJDII0zlciG9JgbT7SmsohlHmY= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= diff --git a/pkg/deps/golang.go b/pkg/deps/golang.go index 7483005..842be06 100644 --- a/pkg/deps/golang.go +++ b/pkg/deps/golang.go @@ -27,6 +27,7 @@ import ( "os/exec" "path/filepath" "regexp" + "strings" "github.com/apache/skywalking-eyes/internal/logger" "github.com/apache/skywalking-eyes/pkg/license" @@ -86,13 +87,15 @@ func (resolver *GoModResolver) ResolvePackages(modules []*packages.Module, confi for _, module := range modules { func() { for _, l := range config.Licenses { - if l.Name == module.Path && l.Version == module.Version { - report.Resolve(&Result{ - Dependency: module.Path, - LicenseSpdxID: l.License, - Version: module.Version, - }) - return + for _, version := range strings.Split(l.Version, ",") { + if l.Name == module.Path && version == module.Version { + report.Resolve(&Result{ + Dependency: module.Path, + LicenseSpdxID: l.License, + Version: module.Version, + }) + return + } } } err := resolver.ResolvePackageLicense(config, module, report) diff --git a/pkg/deps/jar.go b/pkg/deps/jar.go index 3c56188..1e0c774 100644 --- a/pkg/deps/jar.go +++ b/pkg/deps/jar.go @@ -29,31 +29,39 @@ import ( "github.com/apache/skywalking-eyes/internal/logger" "github.com/apache/skywalking-eyes/pkg/license" + "github.com/bmatcuk/doublestar/v2" ) type JarResolver struct{} -func (resolver *JarResolver) CanResolve(jarFile string) bool { - return filepath.Ext(jarFile) == ".jar" +func (resolver *JarResolver) CanResolve(jarFiles string) bool { + return strings.HasSuffix(jarFiles, ".jar") } -func (resolver *JarResolver) Resolve(jarFile string, config *ConfigDeps, report *Report) error { - state := NotFound - if err := resolver.ResolveJar(config, &state, jarFile, Unknown, report); err != nil { - dep := filepath.Base(jarFile) - logger.Log.Warnf("Failed to resolve the license of <%s>: %v\n", dep, state.String()) - report.Skip(&Result{ - Dependency: dep, - LicenseSpdxID: Unknown, - }) +func (resolver *JarResolver) Resolve(jarFiles string, config *ConfigDeps, report *Report) error { + fs, err := doublestar.Glob(jarFiles) + if err != nil { + return err + } + for _, jarFile := range fs { + state := NotFound + if err := resolver.ResolveJar(config, &state, jarFile, Unknown, report); err != nil { + dep := filepath.Base(jarFile) + logger.Log.Warnf("Failed to resolve the license of <%s>: %v\n", dep, state.String()) + report.Skip(&Result{ + Dependency: dep, + LicenseSpdxID: Unknown, + }) + } } - return nil } func (resolver *JarResolver) ResolveJar(config *ConfigDeps, state *State, jarFile, version string, report *Report) error { dep := filepath.Base(jarFile) + logger.Log.Debugf("Resolving the license of <%s> from jar\n", dep) + compressedJar, err := zip.OpenReader(jarFile) if err != nil { return err @@ -66,10 +74,7 @@ func (resolver *JarResolver) ResolveJar(config *ConfigDeps, state *State, jarFil for _, compressedFile := range compressedJar.File { archiveFile := compressedFile.Name switch { - case reHaveManifestFile.MatchString(archiveFile): - manifestFile = compressedFile - - case possibleLicenseFileName.MatchString(archiveFile): + case reMaybeLicense.MatchString(archiveFile): *state |= FoundLicenseInJarLicenseFile buf, err := resolver.ReadFileFromZip(compressedFile) if err != nil { @@ -77,6 +82,8 @@ func (resolver *JarResolver) ResolveJar(config *ConfigDeps, state *State, jarFil } return resolver.IdentifyLicense(config, jarFile, dep, buf.String(), version, report) + case reHaveManifestFile.MatchString(archiveFile): + manifestFile = compressedFile } } @@ -90,11 +97,15 @@ func (resolver *JarResolver) ResolveJar(config *ConfigDeps, state *State, jarFil r := reSearchLicenseInManifestFile.FindStringSubmatch(content) if len(r) != 0 { + lcs := strings.TrimSpace(r[1]) + if l, err := license.Identify(lcs, config.Threshold); err == nil { + lcs = l + } report.Resolve(&Result{ Dependency: dep, LicenseFilePath: jarFile, LicenseContent: strings.TrimSpace(r[1]), - LicenseSpdxID: strings.TrimSpace(r[1]), + LicenseSpdxID: lcs, Version: version, }) return nil @@ -123,16 +134,21 @@ func (resolver *JarResolver) ReadFileFromZip(archiveFile *zip.File) (*bytes.Buff } func (resolver *JarResolver) IdentifyLicense(config *ConfigDeps, path, dep, content, version string, report *Report) error { - identifier, err := license.Identify(content, config.Threshold) - if err != nil { - return err + contents := strings.Split(content, "[, \\s]+") + identifiers := make([]string, 0, len(contents)) + for _, c := range contents { + identifier, err := license.Identify(c, config.Threshold) + if err != nil { + return err + } + identifiers = append(identifiers, identifier) } report.Resolve(&Result{ Dependency: dep, LicenseFilePath: path, LicenseContent: content, - LicenseSpdxID: identifier, + LicenseSpdxID: strings.Join(identifiers, " and "), Version: version, }) return nil diff --git a/pkg/deps/maven.go b/pkg/deps/maven.go index 23c6324..abd869f 100644 --- a/pkg/deps/maven.go +++ b/pkg/deps/maven.go @@ -32,6 +32,7 @@ import ( "golang.org/x/net/html/charset" "github.com/apache/skywalking-eyes/internal/logger" + "github.com/apache/skywalking-eyes/pkg/license" ) type MavenPomResolver struct { @@ -131,7 +132,7 @@ func (resolver *MavenPomResolver) LoadDependencies() ([]*Dependency, error) { cmd.Stdout = bufio.NewWriter(buf) cmd.Stderr = os.Stderr - logger.Log.Debugf("Run command: ă%vă, please wait", cmd.String()) + logger.Log.Debugf("Running command: [%v], please wait", cmd.String()) err := cmd.Run() if err != nil { return nil, err @@ -146,13 +147,15 @@ func (resolver *MavenPomResolver) ResolveDependencies(deps []*Dependency, config for _, dep := range deps { func() { for _, l := range config.Licenses { - if l.Name == fmt.Sprintf("%s:%s", dep.GroupID, dep.ArtifactID) && l.Version == dep.Version { - report.Resolve(&Result{ - Dependency: dep.Jar(), - LicenseSpdxID: l.License, - Version: dep.Version, - }) - return + for _, version := range strings.Split(l.Version, ",") { + if l.Name == fmt.Sprintf("%s:%s", strings.Join(dep.GroupID, "."), dep.ArtifactID) && version == dep.Version { + report.Resolve(&Result{ + Dependency: dep.Jar(), + LicenseSpdxID: l.License, + Version: dep.Version, + }) + return + } } } state := NotFound @@ -187,12 +190,14 @@ func (resolver *MavenPomResolver) ResolveLicenseFromPom(config *ConfigDeps, stat pom, err := resolver.ReadLicensesFromPom(pomFile) if err != nil { return err - } else if pom != nil && len(pom.Licenses) != 0 { + } + + if pom != nil && len(pom.Licenses) != 0 { report.Resolve(&Result{ Dependency: dep.Jar(), LicenseFilePath: pomFile, LicenseContent: pom.Raw(), - LicenseSpdxID: pom.AllLicenses(), + LicenseSpdxID: pom.AllLicenses(config), Version: dep.Version, }) @@ -266,7 +271,7 @@ loop: } var ( - reMaybeLicense = regexp.MustCompile(`(?i)licen[sc]e|copyright|copying`) + reMaybeLicense = regexp.MustCompile(`(?i)licen[sc]e|copyright|copying$`) reHaveManifestFile = regexp.MustCompile(`(?i)^(\S*/)?manifest\.MF$`) reSearchLicenseInManifestFile = regexp.MustCompile(`(?im)^.*?licen[cs]e.*?(http.+)`) ) @@ -310,18 +315,18 @@ func LoadDependenciesTree(data []byte) []*Dependency { stack := []Elem{} unique := make(map[string]struct{}) - reFind := regexp.MustCompile(`(?im)^.*? ([| ]*)(\+-|\\-) (\b.+):(\b.+):(\b.+):(\b.+):(\b.+)$`) + reFind := regexp.MustCompile(`(?im)^.*? ([| ]*)(\+-|\\-) (?P<gid>\b.+?):(?P<aid>\b.+?):(?P<packaging>\b.+)(:\b.+)?:(?P<version>\b.+):(?P<scope>\b.+?)$`) rawDeps := reFind.FindAllSubmatch(data, -1) deps := make([]*Dependency, 0, len(rawDeps)) for _, rawDep := range rawDeps { - gid := strings.Split(string(rawDep[3]), ".") + gid := strings.Split(string(rawDep[reFind.SubexpIndex("gid")]), ".") dep := &Dependency{ GroupID: gid, - ArtifactID: string(rawDep[4]), - Packaging: string(rawDep[5]), - Version: string(rawDep[6]), - Scope: string(rawDep[7]), + ArtifactID: string(rawDep[reFind.SubexpIndex("aid")]), + Packaging: string(rawDep[reFind.SubexpIndex("packaging")]), + Version: string(rawDep[reFind.SubexpIndex("version")]), + Scope: string(rawDep[reFind.SubexpIndex("scope")]), } if _, have := unique[dep.Path()]; have { @@ -391,7 +396,7 @@ func (s *State) String() string { m = append(m, "failed to resolve license content from manifest file found in jar") } - return strings.Join(m, "ď˝") + return strings.Join(m, " | ") } type Dependency struct { @@ -457,12 +462,12 @@ type PomFile struct { } // AllLicenses return all licenses found in pom.xml file -func (pom *PomFile) AllLicenses() string { +func (pom *PomFile) AllLicenses(config *ConfigDeps) string { licenses := []string{} for _, l := range pom.Licenses { - licenses = append(licenses, l.Item()) + licenses = append(licenses, l.Item(config)) } - return strings.Join(licenses, ", ") + return strings.Join(licenses, " and ") } // Raw return raw data @@ -481,14 +486,23 @@ type XMLLicense struct { Comments string `xml:"comments,omitempty"` } -func (l *XMLLicense) Item() string { - return GetLicenseFromURL(l.URL) +func (l *XMLLicense) Item(config *ConfigDeps) string { + if l.URL != "" { + return GetLicenseFromURL(l.URL, config) + } + if l.Name != "" { + return l.Name + } + return l.URL } func (l *XMLLicense) Raw() string { return fmt.Sprintf(`License: {Name: %s, URL: %s, Distribution: %s, Comments: %s, }`, l.Name, l.URL, l.Distribution, l.Comments) } -func GetLicenseFromURL(url string) string { +func GetLicenseFromURL(url string, config *ConfigDeps) string { + if l, err := license.Identify(url, config.Threshold); err == nil { + return l + } return url } diff --git a/pkg/deps/npm.go b/pkg/deps/npm.go index 3fc1a0c..4cdb28f 100644 --- a/pkg/deps/npm.go +++ b/pkg/deps/npm.go @@ -212,9 +212,11 @@ func (resolver *NpmResolver) ResolvePkgFile(result *Result, pkgPath string, lice result.Version = packageInfo.Version for _, l := range licenses { - if l.Name == packageInfo.Name && l.Version == packageInfo.Version { - result.LicenseSpdxID = l.License - return nil + for _, version := range strings.Split(l.Version, ",") { + if l.Name == packageInfo.Name && version == packageInfo.Version { + result.LicenseSpdxID = l.License + return nil + } } } @@ -286,9 +288,11 @@ func (resolver *NpmResolver) ResolveLcsFile(result *Result, pkgPath string, conf return nil } for _, l := range config.Licenses { - if l.Name == info.Name() && l.Version == result.Version { - result.LicenseSpdxID = l.License - return nil + for _, version := range strings.Split(l.Version, ",") { + if l.Name == info.Name() && version == result.Version { + result.LicenseSpdxID = l.License + return nil + } } } identifier, err := license.Identify(string(content), config.Threshold) diff --git a/pkg/deps/resolve.go b/pkg/deps/resolve.go index b48eb90..8769d79 100644 --- a/pkg/deps/resolve.go +++ b/pkg/deps/resolve.go @@ -30,6 +30,7 @@ var Resolvers = []Resolver{ new(GoModResolver), new(NpmResolver), new(MavenPomResolver), + new(JarResolver), } func Resolve(config *ConfigDeps, report *Report) error { diff --git a/pkg/license/identifier.go b/pkg/license/identifier.go index 5c42cf3..05f3192 100644 --- a/pkg/license/identifier.go +++ b/pkg/license/identifier.go @@ -24,6 +24,7 @@ import ( "sync" "github.com/google/licensecheck" + "gopkg.in/yaml.v3" "github.com/apache/skywalking-eyes/assets" "github.com/apache/skywalking-eyes/internal/logger" @@ -40,8 +41,24 @@ var ( // It will be initialized once. func scanner() *licensecheck.Scanner { scannerOnce.Do(func() { + licenses := licensecheck.BuiltinLicenses() + if bs, err := assets.Asset("urls.yaml"); err == nil { + licenseURLs := make(map[string][]string) + if err := yaml.Unmarshal(bs, &licenseURLs); err == nil { + logger.Log.Debug("license URLs:", licenseURLs) + for id, urls := range licenseURLs { + for _, url := range urls { + licenses = append(licenses, licensecheck.License{ + ID: id, + URL: url, + Type: licensecheck.Unknown, + }) + } + } + } + } var err error - _scanner, err = licensecheck.NewScanner(licensecheck.BuiltinLicenses()) + _scanner, err = licensecheck.NewScanner(licenses) if err != nil { logger.Log.Fatalf("licensecheck.NewScanner: %v", err) } @@ -57,10 +74,17 @@ func Identify(content string, threshold int) (string, error) { return "", fmt.Errorf("cannot identify the license, coverage: %.1f%%", coverage.Percent) } + seen := make(map[string]bool) + var sb strings.Builder sb.WriteString(coverage.Match[0].ID) + seen[coverage.Match[0].ID] = true for i := 1; i < len(coverage.Match); i++ { + if seen[coverage.Match[i].ID] { + continue + } + seen[coverage.Match[i].ID] = true sb.WriteString(" and ") sb.WriteString(coverage.Match[i].ID) } diff --git a/pkg/license/identifier_test.go b/pkg/license/identifier_test.go index ae5b666..7ac706f 100644 --- a/pkg/license/identifier_test.go +++ b/pkg/license/identifier_test.go @@ -323,6 +323,36 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. `, want: "BSD-3-Clause", }, + { + name: "URL http://www.apache.org/licenses/LICENSE-2.0", + content: "http://www.apache.org/licenses/LICENSE-2.0", + want: "Apache-2.0", + }, + { + name: "URL https://opensource.org/licenses/Apache-2.0", + content: "https://opensource.org/licenses/Apache-2.0", + want: "Apache-2.0", + }, + { + name: "URL http://www.apache.org/licenses/LICENSE-2.0.txt", + content: "http://www.apache.org/licenses/LICENSE-2.0.txt", + want: "Apache-2.0", + }, + { + name: "URL https://www.bouncycastle.org/licence.html", + content: "https://www.bouncycastle.org/licence.html", + want: "MIT", + }, + { + name: "URL https://www.bouncycastle.org/licence.html", + content: "http://www.gnu.org/software/classpath/license.html, http://opensource.org/licenses/MIT", + want: "GPL-2.0-with-classpath-exception and MIT", + }, + { + name: "URL", + content: "http://www.mozilla.org/MPL/MPL-1.1.html, http://www.gnu.org/licenses/lgpl-2.1.html", + want: "MPL-1.1 and LGPL-2.1", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) {
