This is an automated email from the ASF dual-hosted git repository.

kezhenxu94 pushed a commit to branch urls
in repository https://gitbox.apache.org/repos/asf/skywalking-eyes.git

commit 45480e06615123ca096ad2fe7ae0970b1558e6df
Author: kezhenxu94 <[email protected]>
AuthorDate: Sun May 22 16:29:38 2022 +0800

    Add custom license urls for identification
---
 README.md                      |  2 +-
 assets/urls.yaml               | 18 +++++++++++++
 go.mod                         |  2 ++
 go.sum                         |  4 +--
 pkg/deps/jar.go                | 58 +++++++++++++++++++++++++++---------------
 pkg/deps/maven.go              | 48 +++++++++++++++++++++-------------
 pkg/deps/resolve.go            |  1 +
 pkg/license/identifier.go      | 26 ++++++++++++++++++-
 pkg/license/identifier_test.go | 30 ++++++++++++++++++++++
 9 files changed, 146 insertions(+), 43 deletions(-)

diff --git a/README.md b/README.md
index 49dd4cb..ef366de 100644
--- a/README.md
+++ b/README.md
@@ -788,7 +788,7 @@ dependency: # <15>
 15. The `dependency` section is configurations for resolving dependencies' 
licenses.
 16. The `files` are the files that declare the dependencies of a project, 
typically, `go.mod` in Go project, `pom.xml` in maven project, and 
`package.json` in NodeJS project. If it's a relative path, it's relative to the 
`.licenserc.yaml`.
 17. Declare the licenses which cannot be identified by this tool.
-18. The `name` of the dependency, The name is different for different 
projects, `PackagePath` in Go project, `GroupID:ArtifactID` in maven project, 
`PackageName` in NodeJS project.
+18. The `name` of the dependency, The name is different for different 
projects, `PackagePath` in Go project, `.jar` file in maven project, 
`PackageName` in NodeJS project.
 19. The `version` of the dependency, it's locked, preventing license changed 
between different versions.
 20. The [SPDX ID](https://spdx.org/licenses/) of the dependency license.
 21. The minimum percentage of the file that must contain license text for 
identifying a license, default is `75`.
diff --git a/assets/urls.yaml b/assets/urls.yaml
new file mode 100644
index 0000000..8af29b0
--- /dev/null
+++ b/assets/urls.yaml
@@ -0,0 +1,18 @@
+Apache-2.0:
+  - apache.org/licenses/license-2.0
+  - apache.org/licenses/license-2.0.txt
+  - apache.org/license/license-2.0
+  - apache.org/license/license-2.0.txt
+
+MIT:
+  - bouncycastle.org/licence
+  - opensource.org/licenses/mit-license.php
+
+LGPL-2.1:
+  - gnu.org/licenses/old-licenses/lgpl-2.1
+
+GPL-2.0-with-classpath-exception:
+  - gnu.org/software/classpath/license
+
+EPL-1.0:
+  - eclipse.org/legal/epl-v10
diff --git a/go.mod b/go.mod
index 15ff201..fb294bb 100644
--- a/go.mod
+++ b/go.mod
@@ -21,3 +21,5 @@ require (
        google.golang.org/protobuf v1.28.0 // indirect
        gopkg.in/yaml.v3 v3.0.0-20220512140231-539c8e751b99
 )
+
+replace github.com/google/licensecheck v0.3.1 => 
github.com/kezhenxu94/licensecheck v0.3.2-0.20220522082511-5121cbc45274
diff --git a/go.sum b/go.sum
index ae2f5ee..09dd6b4 100644
--- a/go.sum
+++ b/go.sum
@@ -96,8 +96,6 @@ github.com/google/go-github/v33 v33.0.0/go.mod 
h1:GMdDnVZY/2TsWgp/lkYnpSAh6TrzhA
 github.com/google/go-querystring v1.0.0/go.mod 
h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=
 github.com/google/go-querystring v1.1.0 
h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8=
 github.com/google/go-querystring v1.1.0/go.mod 
h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU=
-github.com/google/licensecheck v0.3.1 
h1:QoxgoDkaeC4nFrtGN1jV7IPmDCHFNIVh54e5hSt6sPs=
-github.com/google/licensecheck v0.3.1/go.mod 
h1:ORkR35t/JjW+emNKtfJDII0zlciG9JgbT7SmsohlHmY=
 github.com/google/martian v2.1.0+incompatible/go.mod 
h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
 github.com/google/martian/v3 v3.0.0/go.mod 
h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
 github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod 
h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
@@ -117,6 +115,8 @@ github.com/inconshreveable/mousetrap v1.0.0 
h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NH
 github.com/inconshreveable/mousetrap v1.0.0/go.mod 
h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
 github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod 
h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
 github.com/jstemmer/go-junit-report v0.9.1/go.mod 
h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
+github.com/kezhenxu94/licensecheck v0.3.2-0.20220522082511-5121cbc45274 
h1:70F5y4oVIT9OoQ61ujeWgycYonXqW6kD6Z0LLi3V73c=
+github.com/kezhenxu94/licensecheck v0.3.2-0.20220522082511-5121cbc45274/go.mod 
h1:ORkR35t/JjW+emNKtfJDII0zlciG9JgbT7SmsohlHmY=
 github.com/kisielk/gotool v1.0.0/go.mod 
h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
 github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
 github.com/kr/pretty v0.1.0/go.mod 
h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
diff --git a/pkg/deps/jar.go b/pkg/deps/jar.go
index 3c56188..1e0c774 100644
--- a/pkg/deps/jar.go
+++ b/pkg/deps/jar.go
@@ -29,31 +29,39 @@ import (
 
        "github.com/apache/skywalking-eyes/internal/logger"
        "github.com/apache/skywalking-eyes/pkg/license"
+       "github.com/bmatcuk/doublestar/v2"
 )
 
 type JarResolver struct{}
 
-func (resolver *JarResolver) CanResolve(jarFile string) bool {
-       return filepath.Ext(jarFile) == ".jar"
+func (resolver *JarResolver) CanResolve(jarFiles string) bool {
+       return strings.HasSuffix(jarFiles, ".jar")
 }
 
-func (resolver *JarResolver) Resolve(jarFile string, config *ConfigDeps, 
report *Report) error {
-       state := NotFound
-       if err := resolver.ResolveJar(config, &state, jarFile, Unknown, 
report); err != nil {
-               dep := filepath.Base(jarFile)
-               logger.Log.Warnf("Failed to resolve the license of <%s>: %v\n", 
dep, state.String())
-               report.Skip(&Result{
-                       Dependency:    dep,
-                       LicenseSpdxID: Unknown,
-               })
+func (resolver *JarResolver) Resolve(jarFiles string, config *ConfigDeps, 
report *Report) error {
+       fs, err := doublestar.Glob(jarFiles)
+       if err != nil {
+               return err
+       }
+       for _, jarFile := range fs {
+               state := NotFound
+               if err := resolver.ResolveJar(config, &state, jarFile, Unknown, 
report); err != nil {
+                       dep := filepath.Base(jarFile)
+                       logger.Log.Warnf("Failed to resolve the license of 
<%s>: %v\n", dep, state.String())
+                       report.Skip(&Result{
+                               Dependency:    dep,
+                               LicenseSpdxID: Unknown,
+                       })
+               }
        }
-
        return nil
 }
 
 func (resolver *JarResolver) ResolveJar(config *ConfigDeps, state *State, 
jarFile, version string, report *Report) error {
        dep := filepath.Base(jarFile)
 
+       logger.Log.Debugf("Resolving the license of <%s> from jar\n", dep)
+
        compressedJar, err := zip.OpenReader(jarFile)
        if err != nil {
                return err
@@ -66,10 +74,7 @@ func (resolver *JarResolver) ResolveJar(config *ConfigDeps, 
state *State, jarFil
        for _, compressedFile := range compressedJar.File {
                archiveFile := compressedFile.Name
                switch {
-               case reHaveManifestFile.MatchString(archiveFile):
-                       manifestFile = compressedFile
-
-               case possibleLicenseFileName.MatchString(archiveFile):
+               case reMaybeLicense.MatchString(archiveFile):
                        *state |= FoundLicenseInJarLicenseFile
                        buf, err := resolver.ReadFileFromZip(compressedFile)
                        if err != nil {
@@ -77,6 +82,8 @@ func (resolver *JarResolver) ResolveJar(config *ConfigDeps, 
state *State, jarFil
                        }
 
                        return resolver.IdentifyLicense(config, jarFile, dep, 
buf.String(), version, report)
+               case reHaveManifestFile.MatchString(archiveFile):
+                       manifestFile = compressedFile
                }
        }
 
@@ -90,11 +97,15 @@ func (resolver *JarResolver) ResolveJar(config *ConfigDeps, 
state *State, jarFil
 
                r := reSearchLicenseInManifestFile.FindStringSubmatch(content)
                if len(r) != 0 {
+                       lcs := strings.TrimSpace(r[1])
+                       if l, err := license.Identify(lcs, config.Threshold); 
err == nil {
+                               lcs = l
+                       }
                        report.Resolve(&Result{
                                Dependency:      dep,
                                LicenseFilePath: jarFile,
                                LicenseContent:  strings.TrimSpace(r[1]),
-                               LicenseSpdxID:   strings.TrimSpace(r[1]),
+                               LicenseSpdxID:   lcs,
                                Version:         version,
                        })
                        return nil
@@ -123,16 +134,21 @@ func (resolver *JarResolver) ReadFileFromZip(archiveFile 
*zip.File) (*bytes.Buff
 }
 
 func (resolver *JarResolver) IdentifyLicense(config *ConfigDeps, path, dep, 
content, version string, report *Report) error {
-       identifier, err := license.Identify(content, config.Threshold)
-       if err != nil {
-               return err
+       contents := strings.Split(content, "[, \\s]+")
+       identifiers := make([]string, 0, len(contents))
+       for _, c := range contents {
+               identifier, err := license.Identify(c, config.Threshold)
+               if err != nil {
+                       return err
+               }
+               identifiers = append(identifiers, identifier)
        }
 
        report.Resolve(&Result{
                Dependency:      dep,
                LicenseFilePath: path,
                LicenseContent:  content,
-               LicenseSpdxID:   identifier,
+               LicenseSpdxID:   strings.Join(identifiers, " and "),
                Version:         version,
        })
        return nil
diff --git a/pkg/deps/maven.go b/pkg/deps/maven.go
index 23c6324..b025d70 100644
--- a/pkg/deps/maven.go
+++ b/pkg/deps/maven.go
@@ -32,6 +32,7 @@ import (
        "golang.org/x/net/html/charset"
 
        "github.com/apache/skywalking-eyes/internal/logger"
+       "github.com/apache/skywalking-eyes/pkg/license"
 )
 
 type MavenPomResolver struct {
@@ -131,7 +132,7 @@ func (resolver *MavenPomResolver) LoadDependencies() 
([]*Dependency, error) {
        cmd.Stdout = bufio.NewWriter(buf)
        cmd.Stderr = os.Stderr
 
-       logger.Log.Debugf("Run command: 「%v」, please wait", cmd.String())
+       logger.Log.Debugf("Running command: [%v], please wait", cmd.String())
        err := cmd.Run()
        if err != nil {
                return nil, err
@@ -146,7 +147,7 @@ func (resolver *MavenPomResolver) ResolveDependencies(deps 
[]*Dependency, config
        for _, dep := range deps {
                func() {
                        for _, l := range config.Licenses {
-                               if l.Name == fmt.Sprintf("%s:%s", dep.GroupID, 
dep.ArtifactID) && l.Version == dep.Version {
+                               if l.Name == fmt.Sprintf("%s:%s", 
strings.Join(dep.GroupID, "."), dep.ArtifactID) && l.Version == dep.Version {
                                        report.Resolve(&Result{
                                                Dependency:    dep.Jar(),
                                                LicenseSpdxID: l.License,
@@ -187,12 +188,14 @@ func (resolver *MavenPomResolver) 
ResolveLicenseFromPom(config *ConfigDeps, stat
        pom, err := resolver.ReadLicensesFromPom(pomFile)
        if err != nil {
                return err
-       } else if pom != nil && len(pom.Licenses) != 0 {
+       }
+
+       if pom != nil && len(pom.Licenses) != 0 {
                report.Resolve(&Result{
                        Dependency:      dep.Jar(),
                        LicenseFilePath: pomFile,
                        LicenseContent:  pom.Raw(),
-                       LicenseSpdxID:   pom.AllLicenses(),
+                       LicenseSpdxID:   pom.AllLicenses(config),
                        Version:         dep.Version,
                })
 
@@ -266,7 +269,7 @@ loop:
 }
 
 var (
-       reMaybeLicense                = 
regexp.MustCompile(`(?i)licen[sc]e|copyright|copying`)
+       reMaybeLicense                = 
regexp.MustCompile(`(?i)licen[sc]e|copyright|copying$`)
        reHaveManifestFile            = 
regexp.MustCompile(`(?i)^(\S*/)?manifest\.MF$`)
        reSearchLicenseInManifestFile = 
regexp.MustCompile(`(?im)^.*?licen[cs]e.*?(http.+)`)
 )
@@ -310,18 +313,18 @@ func LoadDependenciesTree(data []byte) []*Dependency {
        stack := []Elem{}
        unique := make(map[string]struct{})
 
-       reFind := regexp.MustCompile(`(?im)^.*? ([| ]*)(\+-|\\-) 
(\b.+):(\b.+):(\b.+):(\b.+):(\b.+)$`)
+       reFind := regexp.MustCompile(`(?im)^.*? ([| ]*)(\+-|\\-) 
(?P<gid>\b.+?):(?P<aid>\b.+?):(?P<packaging>\b.+)(:\b.+)?:(?P<version>\b.+):(?P<scope>\b.+?)$`)
        rawDeps := reFind.FindAllSubmatch(data, -1)
 
        deps := make([]*Dependency, 0, len(rawDeps))
        for _, rawDep := range rawDeps {
-               gid := strings.Split(string(rawDep[3]), ".")
+               gid := strings.Split(string(rawDep[reFind.SubexpIndex("gid")]), 
".")
                dep := &Dependency{
                        GroupID:    gid,
-                       ArtifactID: string(rawDep[4]),
-                       Packaging:  string(rawDep[5]),
-                       Version:    string(rawDep[6]),
-                       Scope:      string(rawDep[7]),
+                       ArtifactID: string(rawDep[reFind.SubexpIndex("aid")]),
+                       Packaging:  
string(rawDep[reFind.SubexpIndex("packaging")]),
+                       Version:    
string(rawDep[reFind.SubexpIndex("version")]),
+                       Scope:      string(rawDep[reFind.SubexpIndex("scope")]),
                }
 
                if _, have := unique[dep.Path()]; have {
@@ -391,7 +394,7 @@ func (s *State) String() string {
                m = append(m, "failed to resolve license content from manifest 
file found in jar")
        }
 
-       return strings.Join(m, "|")
+       return strings.Join(m, " | ")
 }
 
 type Dependency struct {
@@ -457,12 +460,12 @@ type PomFile struct {
 }
 
 // AllLicenses return all licenses found in pom.xml file
-func (pom *PomFile) AllLicenses() string {
+func (pom *PomFile) AllLicenses(config *ConfigDeps) string {
        licenses := []string{}
        for _, l := range pom.Licenses {
-               licenses = append(licenses, l.Item())
+               licenses = append(licenses, l.Item(config))
        }
-       return strings.Join(licenses, ", ")
+       return strings.Join(licenses, " and ")
 }
 
 // Raw return raw data
@@ -481,14 +484,23 @@ type XMLLicense struct {
        Comments     string `xml:"comments,omitempty"`
 }
 
-func (l *XMLLicense) Item() string {
-       return GetLicenseFromURL(l.URL)
+func (l *XMLLicense) Item(config *ConfigDeps) string {
+       if l.URL != "" {
+               return GetLicenseFromURL(l.URL, config)
+       }
+       if l.Name != "" {
+               return l.Name
+       }
+       return l.URL
 }
 
 func (l *XMLLicense) Raw() string {
        return fmt.Sprintf(`License: {Name: %s, URL: %s, Distribution: %s, 
Comments: %s, }`, l.Name, l.URL, l.Distribution, l.Comments)
 }
 
-func GetLicenseFromURL(url string) string {
+func GetLicenseFromURL(url string, config *ConfigDeps) string {
+       if l, err := license.Identify(url, config.Threshold); err == nil {
+               return l
+       }
        return url
 }
diff --git a/pkg/deps/resolve.go b/pkg/deps/resolve.go
index b48eb90..8769d79 100644
--- a/pkg/deps/resolve.go
+++ b/pkg/deps/resolve.go
@@ -30,6 +30,7 @@ var Resolvers = []Resolver{
        new(GoModResolver),
        new(NpmResolver),
        new(MavenPomResolver),
+       new(JarResolver),
 }
 
 func Resolve(config *ConfigDeps, report *Report) error {
diff --git a/pkg/license/identifier.go b/pkg/license/identifier.go
index 5c42cf3..05f3192 100644
--- a/pkg/license/identifier.go
+++ b/pkg/license/identifier.go
@@ -24,6 +24,7 @@ import (
        "sync"
 
        "github.com/google/licensecheck"
+       "gopkg.in/yaml.v3"
 
        "github.com/apache/skywalking-eyes/assets"
        "github.com/apache/skywalking-eyes/internal/logger"
@@ -40,8 +41,24 @@ var (
 // It will be initialized once.
 func scanner() *licensecheck.Scanner {
        scannerOnce.Do(func() {
+               licenses := licensecheck.BuiltinLicenses()
+               if bs, err := assets.Asset("urls.yaml"); err == nil {
+                       licenseURLs := make(map[string][]string)
+                       if err := yaml.Unmarshal(bs, &licenseURLs); err == nil {
+                               logger.Log.Debug("license URLs:", licenseURLs)
+                               for id, urls := range licenseURLs {
+                                       for _, url := range urls {
+                                               licenses = append(licenses, 
licensecheck.License{
+                                                       ID:   id,
+                                                       URL:  url,
+                                                       Type: 
licensecheck.Unknown,
+                                               })
+                                       }
+                               }
+                       }
+               }
                var err error
-               _scanner, err = 
licensecheck.NewScanner(licensecheck.BuiltinLicenses())
+               _scanner, err = licensecheck.NewScanner(licenses)
                if err != nil {
                        logger.Log.Fatalf("licensecheck.NewScanner: %v", err)
                }
@@ -57,10 +74,17 @@ func Identify(content string, threshold int) (string, 
error) {
                return "", fmt.Errorf("cannot identify the license, coverage: 
%.1f%%", coverage.Percent)
        }
 
+       seen := make(map[string]bool)
+
        var sb strings.Builder
        sb.WriteString(coverage.Match[0].ID)
+       seen[coverage.Match[0].ID] = true
 
        for i := 1; i < len(coverage.Match); i++ {
+               if seen[coverage.Match[i].ID] {
+                       continue
+               }
+               seen[coverage.Match[i].ID] = true
                sb.WriteString(" and ")
                sb.WriteString(coverage.Match[i].ID)
        }
diff --git a/pkg/license/identifier_test.go b/pkg/license/identifier_test.go
index ae5b666..7ac706f 100644
--- a/pkg/license/identifier_test.go
+++ b/pkg/license/identifier_test.go
@@ -323,6 +323,36 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
SUCH DAMAGE.
 `,
                        want: "BSD-3-Clause",
                },
+               {
+                       name:    "URL 
http://www.apache.org/licenses/LICENSE-2.0";,
+                       content: "http://www.apache.org/licenses/LICENSE-2.0";,
+                       want:    "Apache-2.0",
+               },
+               {
+                       name:    "URL 
https://opensource.org/licenses/Apache-2.0";,
+                       content: "https://opensource.org/licenses/Apache-2.0";,
+                       want:    "Apache-2.0",
+               },
+               {
+                       name:    "URL 
http://www.apache.org/licenses/LICENSE-2.0.txt";,
+                       content: 
"http://www.apache.org/licenses/LICENSE-2.0.txt";,
+                       want:    "Apache-2.0",
+               },
+               {
+                       name:    "URL 
https://www.bouncycastle.org/licence.html";,
+                       content: "https://www.bouncycastle.org/licence.html";,
+                       want:    "MIT",
+               },
+               {
+                       name:    "URL 
https://www.bouncycastle.org/licence.html";,
+                       content: 
"http://www.gnu.org/software/classpath/license.html, 
http://opensource.org/licenses/MIT";,
+                       want:    "GPL-2.0-with-classpath-exception and MIT",
+               },
+               {
+                       name:    "URL",
+                       content: "http://www.mozilla.org/MPL/MPL-1.1.html, 
http://www.gnu.org/licenses/lgpl-2.1.html";,
+                       want:    "MPL-1.1 and LGPL-2.1",
+               },
        }
        for _, tt := range tests {
                t.Run(tt.name, func(t *testing.T) {

Reply via email to