This is an automated email from the ASF dual-hosted git repository.

kezhenxu94 pushed a commit to branch urls
in repository https://gitbox.apache.org/repos/asf/skywalking-eyes.git

commit 5ce89049923855a55ca8541a4b4639615a3b5bba
Author: kezhenxu94 <[email protected]>
AuthorDate: Sun May 22 16:29:38 2022 +0800

    Add custom license urls for identification
---
 assets/urls.yaml               | 57 ++++++++++++++++++++++++++++++++++++++
 pkg/deps/golang.go             | 17 +++++++-----
 pkg/deps/jar.go                | 58 +++++++++++++++++++++++++--------------
 pkg/deps/maven.go              | 62 ++++++++++++++++++++++++++----------------
 pkg/deps/npm.go                | 16 +++++++----
 pkg/deps/resolve.go            |  1 +
 pkg/license/identifier.go      | 26 +++++++++++++++++-
 pkg/license/identifier_test.go | 30 ++++++++++++++++++++
 8 files changed, 208 insertions(+), 59 deletions(-)

diff --git a/assets/urls.yaml b/assets/urls.yaml
new file mode 100644
index 0000000..c520a03
--- /dev/null
+++ b/assets/urls.yaml
@@ -0,0 +1,57 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+Apache-2.0:
+  - apache.org/licenses/license-2.0
+  - apache.org/licenses/license-2.0.txt
+  - apache.org/license/license-2.0
+  - apache.org/license/license-2.0.txt
+  - www.apache.org/licenses/license-2.0.txt
+  - opensource.org/licenses/apache-2.0
+  - www.apache.org/license/LICENSE-2.0.txt
+
+MIT:
+  - bouncycastle.org/licence
+  - opensource.org/licenses/mit-license.php
+  - opensource.org/licenses/mit
+  - www.bouncycastle.org/licence.html
+  - www.opensource.org/licenses/mit-license.php
+
+LGPL-2.1:
+  - gnu.org/licenses/old-licenses/lgpl-2.1
+  - www.gnu.org/licenses/old-licenses/lgpl-2.1.html
+  - www.gnu.org/licenses/lgpl-2.1.html
+
+GPL-2.0-with-classpath-exception:
+  - gnu.org/software/classpath/license
+  - www.gnu.org/software/classpath/license.html
+
+EPL-1.0:
+  - eclipse.org/legal/epl-v10
+  - www.eclipse.org/legal/epl-v10.html
+
+BSD-2-Clause:
+  - opensource.org/licenses/BSD-2-Clause
+
+BSD-3-Clause:
+  - opensource.org/licenses/BSD-3-Clause
+
+CC0-1.0:
+  - creativecommons.org/publicdomain/zero/1.0/
+
+MPL-1.1:
+  - www.mozilla.org/MPL/MPL-1.1.html
diff --git a/pkg/deps/golang.go b/pkg/deps/golang.go
index 7483005..842be06 100644
--- a/pkg/deps/golang.go
+++ b/pkg/deps/golang.go
@@ -27,6 +27,7 @@ import (
        "os/exec"
        "path/filepath"
        "regexp"
+       "strings"
 
        "github.com/apache/skywalking-eyes/internal/logger"
        "github.com/apache/skywalking-eyes/pkg/license"
@@ -86,13 +87,15 @@ func (resolver *GoModResolver) ResolvePackages(modules 
[]*packages.Module, confi
        for _, module := range modules {
                func() {
                        for _, l := range config.Licenses {
-                               if l.Name == module.Path && l.Version == 
module.Version {
-                                       report.Resolve(&Result{
-                                               Dependency:    module.Path,
-                                               LicenseSpdxID: l.License,
-                                               Version:       module.Version,
-                                       })
-                                       return
+                               for _, version := range 
strings.Split(l.Version, ",") {
+                                       if l.Name == module.Path && version == 
module.Version {
+                                               report.Resolve(&Result{
+                                                       Dependency:    
module.Path,
+                                                       LicenseSpdxID: 
l.License,
+                                                       Version:       
module.Version,
+                                               })
+                                               return
+                                       }
                                }
                        }
                        err := resolver.ResolvePackageLicense(config, module, 
report)
diff --git a/pkg/deps/jar.go b/pkg/deps/jar.go
index 3c56188..1e0c774 100644
--- a/pkg/deps/jar.go
+++ b/pkg/deps/jar.go
@@ -29,31 +29,39 @@ import (
 
        "github.com/apache/skywalking-eyes/internal/logger"
        "github.com/apache/skywalking-eyes/pkg/license"
+       "github.com/bmatcuk/doublestar/v2"
 )
 
 type JarResolver struct{}
 
-func (resolver *JarResolver) CanResolve(jarFile string) bool {
-       return filepath.Ext(jarFile) == ".jar"
+func (resolver *JarResolver) CanResolve(jarFiles string) bool {
+       return strings.HasSuffix(jarFiles, ".jar")
 }
 
-func (resolver *JarResolver) Resolve(jarFile string, config *ConfigDeps, 
report *Report) error {
-       state := NotFound
-       if err := resolver.ResolveJar(config, &state, jarFile, Unknown, 
report); err != nil {
-               dep := filepath.Base(jarFile)
-               logger.Log.Warnf("Failed to resolve the license of <%s>: %v\n", 
dep, state.String())
-               report.Skip(&Result{
-                       Dependency:    dep,
-                       LicenseSpdxID: Unknown,
-               })
+func (resolver *JarResolver) Resolve(jarFiles string, config *ConfigDeps, 
report *Report) error {
+       fs, err := doublestar.Glob(jarFiles)
+       if err != nil {
+               return err
+       }
+       for _, jarFile := range fs {
+               state := NotFound
+               if err := resolver.ResolveJar(config, &state, jarFile, Unknown, 
report); err != nil {
+                       dep := filepath.Base(jarFile)
+                       logger.Log.Warnf("Failed to resolve the license of 
<%s>: %v\n", dep, state.String())
+                       report.Skip(&Result{
+                               Dependency:    dep,
+                               LicenseSpdxID: Unknown,
+                       })
+               }
        }
-
        return nil
 }
 
 func (resolver *JarResolver) ResolveJar(config *ConfigDeps, state *State, 
jarFile, version string, report *Report) error {
        dep := filepath.Base(jarFile)
 
+       logger.Log.Debugf("Resolving the license of <%s> from jar\n", dep)
+
        compressedJar, err := zip.OpenReader(jarFile)
        if err != nil {
                return err
@@ -66,10 +74,7 @@ func (resolver *JarResolver) ResolveJar(config *ConfigDeps, 
state *State, jarFil
        for _, compressedFile := range compressedJar.File {
                archiveFile := compressedFile.Name
                switch {
-               case reHaveManifestFile.MatchString(archiveFile):
-                       manifestFile = compressedFile
-
-               case possibleLicenseFileName.MatchString(archiveFile):
+               case reMaybeLicense.MatchString(archiveFile):
                        *state |= FoundLicenseInJarLicenseFile
                        buf, err := resolver.ReadFileFromZip(compressedFile)
                        if err != nil {
@@ -77,6 +82,8 @@ func (resolver *JarResolver) ResolveJar(config *ConfigDeps, 
state *State, jarFil
                        }
 
                        return resolver.IdentifyLicense(config, jarFile, dep, 
buf.String(), version, report)
+               case reHaveManifestFile.MatchString(archiveFile):
+                       manifestFile = compressedFile
                }
        }
 
@@ -90,11 +97,15 @@ func (resolver *JarResolver) ResolveJar(config *ConfigDeps, 
state *State, jarFil
 
                r := reSearchLicenseInManifestFile.FindStringSubmatch(content)
                if len(r) != 0 {
+                       lcs := strings.TrimSpace(r[1])
+                       if l, err := license.Identify(lcs, config.Threshold); 
err == nil {
+                               lcs = l
+                       }
                        report.Resolve(&Result{
                                Dependency:      dep,
                                LicenseFilePath: jarFile,
                                LicenseContent:  strings.TrimSpace(r[1]),
-                               LicenseSpdxID:   strings.TrimSpace(r[1]),
+                               LicenseSpdxID:   lcs,
                                Version:         version,
                        })
                        return nil
@@ -123,16 +134,21 @@ func (resolver *JarResolver) ReadFileFromZip(archiveFile 
*zip.File) (*bytes.Buff
 }
 
 func (resolver *JarResolver) IdentifyLicense(config *ConfigDeps, path, dep, 
content, version string, report *Report) error {
-       identifier, err := license.Identify(content, config.Threshold)
-       if err != nil {
-               return err
+       contents := strings.Split(content, "[, \\s]+")
+       identifiers := make([]string, 0, len(contents))
+       for _, c := range contents {
+               identifier, err := license.Identify(c, config.Threshold)
+               if err != nil {
+                       return err
+               }
+               identifiers = append(identifiers, identifier)
        }
 
        report.Resolve(&Result{
                Dependency:      dep,
                LicenseFilePath: path,
                LicenseContent:  content,
-               LicenseSpdxID:   identifier,
+               LicenseSpdxID:   strings.Join(identifiers, " and "),
                Version:         version,
        })
        return nil
diff --git a/pkg/deps/maven.go b/pkg/deps/maven.go
index 23c6324..abd869f 100644
--- a/pkg/deps/maven.go
+++ b/pkg/deps/maven.go
@@ -32,6 +32,7 @@ import (
        "golang.org/x/net/html/charset"
 
        "github.com/apache/skywalking-eyes/internal/logger"
+       "github.com/apache/skywalking-eyes/pkg/license"
 )
 
 type MavenPomResolver struct {
@@ -131,7 +132,7 @@ func (resolver *MavenPomResolver) LoadDependencies() 
([]*Dependency, error) {
        cmd.Stdout = bufio.NewWriter(buf)
        cmd.Stderr = os.Stderr
 
-       logger.Log.Debugf("Run command: 「%v」, please wait", cmd.String())
+       logger.Log.Debugf("Running command: [%v], please wait", cmd.String())
        err := cmd.Run()
        if err != nil {
                return nil, err
@@ -146,13 +147,15 @@ func (resolver *MavenPomResolver) 
ResolveDependencies(deps []*Dependency, config
        for _, dep := range deps {
                func() {
                        for _, l := range config.Licenses {
-                               if l.Name == fmt.Sprintf("%s:%s", dep.GroupID, 
dep.ArtifactID) && l.Version == dep.Version {
-                                       report.Resolve(&Result{
-                                               Dependency:    dep.Jar(),
-                                               LicenseSpdxID: l.License,
-                                               Version:       dep.Version,
-                                       })
-                                       return
+                               for _, version := range 
strings.Split(l.Version, ",") {
+                                       if l.Name == fmt.Sprintf("%s:%s", 
strings.Join(dep.GroupID, "."), dep.ArtifactID) && version == dep.Version {
+                                               report.Resolve(&Result{
+                                                       Dependency:    
dep.Jar(),
+                                                       LicenseSpdxID: 
l.License,
+                                                       Version:       
dep.Version,
+                                               })
+                                               return
+                                       }
                                }
                        }
                        state := NotFound
@@ -187,12 +190,14 @@ func (resolver *MavenPomResolver) 
ResolveLicenseFromPom(config *ConfigDeps, stat
        pom, err := resolver.ReadLicensesFromPom(pomFile)
        if err != nil {
                return err
-       } else if pom != nil && len(pom.Licenses) != 0 {
+       }
+
+       if pom != nil && len(pom.Licenses) != 0 {
                report.Resolve(&Result{
                        Dependency:      dep.Jar(),
                        LicenseFilePath: pomFile,
                        LicenseContent:  pom.Raw(),
-                       LicenseSpdxID:   pom.AllLicenses(),
+                       LicenseSpdxID:   pom.AllLicenses(config),
                        Version:         dep.Version,
                })
 
@@ -266,7 +271,7 @@ loop:
 }
 
 var (
-       reMaybeLicense                = 
regexp.MustCompile(`(?i)licen[sc]e|copyright|copying`)
+       reMaybeLicense                = 
regexp.MustCompile(`(?i)licen[sc]e|copyright|copying$`)
        reHaveManifestFile            = 
regexp.MustCompile(`(?i)^(\S*/)?manifest\.MF$`)
        reSearchLicenseInManifestFile = 
regexp.MustCompile(`(?im)^.*?licen[cs]e.*?(http.+)`)
 )
@@ -310,18 +315,18 @@ func LoadDependenciesTree(data []byte) []*Dependency {
        stack := []Elem{}
        unique := make(map[string]struct{})
 
-       reFind := regexp.MustCompile(`(?im)^.*? ([| ]*)(\+-|\\-) 
(\b.+):(\b.+):(\b.+):(\b.+):(\b.+)$`)
+       reFind := regexp.MustCompile(`(?im)^.*? ([| ]*)(\+-|\\-) 
(?P<gid>\b.+?):(?P<aid>\b.+?):(?P<packaging>\b.+)(:\b.+)?:(?P<version>\b.+):(?P<scope>\b.+?)$`)
        rawDeps := reFind.FindAllSubmatch(data, -1)
 
        deps := make([]*Dependency, 0, len(rawDeps))
        for _, rawDep := range rawDeps {
-               gid := strings.Split(string(rawDep[3]), ".")
+               gid := strings.Split(string(rawDep[reFind.SubexpIndex("gid")]), 
".")
                dep := &Dependency{
                        GroupID:    gid,
-                       ArtifactID: string(rawDep[4]),
-                       Packaging:  string(rawDep[5]),
-                       Version:    string(rawDep[6]),
-                       Scope:      string(rawDep[7]),
+                       ArtifactID: string(rawDep[reFind.SubexpIndex("aid")]),
+                       Packaging:  
string(rawDep[reFind.SubexpIndex("packaging")]),
+                       Version:    
string(rawDep[reFind.SubexpIndex("version")]),
+                       Scope:      string(rawDep[reFind.SubexpIndex("scope")]),
                }
 
                if _, have := unique[dep.Path()]; have {
@@ -391,7 +396,7 @@ func (s *State) String() string {
                m = append(m, "failed to resolve license content from manifest 
file found in jar")
        }
 
-       return strings.Join(m, "|")
+       return strings.Join(m, " | ")
 }
 
 type Dependency struct {
@@ -457,12 +462,12 @@ type PomFile struct {
 }
 
 // AllLicenses return all licenses found in pom.xml file
-func (pom *PomFile) AllLicenses() string {
+func (pom *PomFile) AllLicenses(config *ConfigDeps) string {
        licenses := []string{}
        for _, l := range pom.Licenses {
-               licenses = append(licenses, l.Item())
+               licenses = append(licenses, l.Item(config))
        }
-       return strings.Join(licenses, ", ")
+       return strings.Join(licenses, " and ")
 }
 
 // Raw return raw data
@@ -481,14 +486,23 @@ type XMLLicense struct {
        Comments     string `xml:"comments,omitempty"`
 }
 
-func (l *XMLLicense) Item() string {
-       return GetLicenseFromURL(l.URL)
+func (l *XMLLicense) Item(config *ConfigDeps) string {
+       if l.URL != "" {
+               return GetLicenseFromURL(l.URL, config)
+       }
+       if l.Name != "" {
+               return l.Name
+       }
+       return l.URL
 }
 
 func (l *XMLLicense) Raw() string {
        return fmt.Sprintf(`License: {Name: %s, URL: %s, Distribution: %s, 
Comments: %s, }`, l.Name, l.URL, l.Distribution, l.Comments)
 }
 
-func GetLicenseFromURL(url string) string {
+func GetLicenseFromURL(url string, config *ConfigDeps) string {
+       if l, err := license.Identify(url, config.Threshold); err == nil {
+               return l
+       }
        return url
 }
diff --git a/pkg/deps/npm.go b/pkg/deps/npm.go
index 3fc1a0c..4cdb28f 100644
--- a/pkg/deps/npm.go
+++ b/pkg/deps/npm.go
@@ -212,9 +212,11 @@ func (resolver *NpmResolver) ResolvePkgFile(result 
*Result, pkgPath string, lice
 
        result.Version = packageInfo.Version
        for _, l := range licenses {
-               if l.Name == packageInfo.Name && l.Version == 
packageInfo.Version {
-                       result.LicenseSpdxID = l.License
-                       return nil
+               for _, version := range strings.Split(l.Version, ",") {
+                       if l.Name == packageInfo.Name && version == 
packageInfo.Version {
+                               result.LicenseSpdxID = l.License
+                               return nil
+                       }
                }
        }
 
@@ -286,9 +288,11 @@ func (resolver *NpmResolver) ResolveLcsFile(result 
*Result, pkgPath string, conf
                        return nil
                }
                for _, l := range config.Licenses {
-                       if l.Name == info.Name() && l.Version == result.Version 
{
-                               result.LicenseSpdxID = l.License
-                               return nil
+                       for _, version := range strings.Split(l.Version, ",") {
+                               if l.Name == info.Name() && version == 
result.Version {
+                                       result.LicenseSpdxID = l.License
+                                       return nil
+                               }
                        }
                }
                identifier, err := license.Identify(string(content), 
config.Threshold)
diff --git a/pkg/deps/resolve.go b/pkg/deps/resolve.go
index b48eb90..8769d79 100644
--- a/pkg/deps/resolve.go
+++ b/pkg/deps/resolve.go
@@ -30,6 +30,7 @@ var Resolvers = []Resolver{
        new(GoModResolver),
        new(NpmResolver),
        new(MavenPomResolver),
+       new(JarResolver),
 }
 
 func Resolve(config *ConfigDeps, report *Report) error {
diff --git a/pkg/license/identifier.go b/pkg/license/identifier.go
index 5c42cf3..d12376b 100644
--- a/pkg/license/identifier.go
+++ b/pkg/license/identifier.go
@@ -24,6 +24,7 @@ import (
        "sync"
 
        "github.com/google/licensecheck"
+       "gopkg.in/yaml.v3"
 
        "github.com/apache/skywalking-eyes/assets"
        "github.com/apache/skywalking-eyes/internal/logger"
@@ -40,8 +41,24 @@ var (
 // It will be initialized once.
 func scanner() *licensecheck.Scanner {
        scannerOnce.Do(func() {
+               licenses := licensecheck.BuiltinLicenses()
+               if bs, err := assets.Asset("urls.yaml"); err == nil {
+                       licenseURLs := make(map[string][]string)
+                       if err := yaml.Unmarshal(bs, &licenseURLs); err == nil {
+                               logger.Log.Debug("license URLs:", licenseURLs)
+                               for id, urls := range licenseURLs {
+                                       for _, url := range urls {
+                                               licenses = append(licenses, 
licensecheck.License{
+                                                       ID:   id,
+                                                       URL:  
strings.ToLower(url),
+                                                       Type: 
licensecheck.Unknown,
+                                               })
+                                       }
+                               }
+                       }
+               }
                var err error
-               _scanner, err = 
licensecheck.NewScanner(licensecheck.BuiltinLicenses())
+               _scanner, err = licensecheck.NewScanner(licenses)
                if err != nil {
                        logger.Log.Fatalf("licensecheck.NewScanner: %v", err)
                }
@@ -57,10 +74,17 @@ func Identify(content string, threshold int) (string, 
error) {
                return "", fmt.Errorf("cannot identify the license, coverage: 
%.1f%%", coverage.Percent)
        }
 
+       seen := make(map[string]bool)
+
        var sb strings.Builder
        sb.WriteString(coverage.Match[0].ID)
+       seen[coverage.Match[0].ID] = true
 
        for i := 1; i < len(coverage.Match); i++ {
+               if seen[coverage.Match[i].ID] {
+                       continue
+               }
+               seen[coverage.Match[i].ID] = true
                sb.WriteString(" and ")
                sb.WriteString(coverage.Match[i].ID)
        }
diff --git a/pkg/license/identifier_test.go b/pkg/license/identifier_test.go
index ae5b666..7ac706f 100644
--- a/pkg/license/identifier_test.go
+++ b/pkg/license/identifier_test.go
@@ -323,6 +323,36 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
SUCH DAMAGE.
 `,
                        want: "BSD-3-Clause",
                },
+               {
+                       name:    "URL 
http://www.apache.org/licenses/LICENSE-2.0";,
+                       content: "http://www.apache.org/licenses/LICENSE-2.0";,
+                       want:    "Apache-2.0",
+               },
+               {
+                       name:    "URL 
https://opensource.org/licenses/Apache-2.0";,
+                       content: "https://opensource.org/licenses/Apache-2.0";,
+                       want:    "Apache-2.0",
+               },
+               {
+                       name:    "URL 
http://www.apache.org/licenses/LICENSE-2.0.txt";,
+                       content: 
"http://www.apache.org/licenses/LICENSE-2.0.txt";,
+                       want:    "Apache-2.0",
+               },
+               {
+                       name:    "URL 
https://www.bouncycastle.org/licence.html";,
+                       content: "https://www.bouncycastle.org/licence.html";,
+                       want:    "MIT",
+               },
+               {
+                       name:    "URL 
https://www.bouncycastle.org/licence.html";,
+                       content: 
"http://www.gnu.org/software/classpath/license.html, 
http://opensource.org/licenses/MIT";,
+                       want:    "GPL-2.0-with-classpath-exception and MIT",
+               },
+               {
+                       name:    "URL",
+                       content: "http://www.mozilla.org/MPL/MPL-1.1.html, 
http://www.gnu.org/licenses/lgpl-2.1.html";,
+                       want:    "MPL-1.1 and LGPL-2.1",
+               },
        }
        for _, tt := range tests {
                t.Run(tt.name, func(t *testing.T) {

Reply via email to