This is an automated email from the ASF dual-hosted git repository.

kezhenxu94 pushed a commit to branch refactor
in repository https://gitbox.apache.org/repos/asf/skywalking-eyes.git

commit e194b68d89bea1b7722833e270ad90e35980cd0c
Author: kezhenxu94 <[email protected]>
AuthorDate: Wed Dec 23 13:08:41 2020 +0800

    Refactor the normalizers to increase accuracy and minimise the use of 
pattern
---
 .golangci.yml                          |  37 ++++-
 .licenserc.yaml                        |   4 +-
 license-eye/commands/header/check.go   |   3 +-
 license-eye/commands/header/fix.go     |   3 +-
 license-eye/internal/logger/log.go     |   2 +-
 license-eye/pkg/header/check.go        |  40 ++---
 license-eye/pkg/header/check_test.go   |   9 +-
 license-eye/pkg/header/config.go       |  50 ++-----
 license-eye/pkg/header/fix.go          |   7 +-
 license-eye/pkg/license/norm.go        | 171 ++++++++++++++++++++++
 license-eye/pkg/license/norm_test.go   | 260 +++++++++++++++++++++++++++++++++
 license-eye/pkg/{header => }/result.go |   2 +-
 12 files changed, 506 insertions(+), 82 deletions(-)

diff --git a/.golangci.yml b/.golangci.yml
index f4a7795..f4a6fee 100644
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -16,6 +16,9 @@
 # under the License.
 # 
 
+run:
+  tests: false
+
 linters-settings:
   govet:
     check-shadowing: true
@@ -37,6 +40,38 @@ linters-settings:
       fmt: "logging is allowed only by logutils.Log"
   misspell:
     locale: US
+    ignore-words:
+      - analogue
+      - analyse
+      - artefact
+      - authorised
+      - calibre
+      - cancelled
+      - catalogue
+      - categorise
+      - centre
+      - emphasised
+      - favour
+      - favourite
+      - fulfil
+      - fulfilment
+      - initialise
+      - labelling
+      - labour
+      - licence
+      - maximise
+      - modelled
+      - modelling
+      - offence
+      - optimise
+      - organisation
+      - organise
+      - practise
+      - programme
+      - realise
+      - recognise
+      - signalling
+      - utilisation
   lll:
     line-length: 150
   goimports:
@@ -51,7 +86,7 @@ linters-settings:
     disabled-checks:
       - ifElseChain
   funlen:
-    lines: 150
+    lines: 100
     statements: 50
   whitespace:
     multi-if: false
diff --git a/.licenserc.yaml b/.licenserc.yaml
index e4494c7..819f2b1 100644
--- a/.licenserc.yaml
+++ b/.licenserc.yaml
@@ -38,10 +38,10 @@ header: # `header` section is configurations for source 
codes license header.
   # `pattern` is optional regexp if all the file headers are the same as 
`license` (linebreaks doesn't matter);
   # In the `pattern`, all punctuations should be removed unless they are part 
of the regex;
   pattern: |
-    Licensed to( the)? Apache Software Foundation \(ASF\) under one or more 
contributor
+    Licensed to the Apache Software Foundation under one or more contributor
     license agreements. See the NOTICE file distributed with
     this work for additional information regarding copyright
-    ownership. (Apache Software Foundation \(ASF\)|The ASF) licenses this file 
to you under
+    ownership. The Apache Software Foundation licenses this file to you under
     the Apache License, Version 2.0 \(the "License"\); you may
     not use this file except in compliance with the License.
     You may obtain a copy of the License at
diff --git a/license-eye/commands/header/check.go 
b/license-eye/commands/header/check.go
index c349ace..0578286 100644
--- a/license-eye/commands/header/check.go
+++ b/license-eye/commands/header/check.go
@@ -19,6 +19,7 @@ package header
 
 import (
        "github.com/apache/skywalking-eyes/license-eye/internal/logger"
+       "github.com/apache/skywalking-eyes/license-eye/pkg"
        "github.com/apache/skywalking-eyes/license-eye/pkg/config"
        "github.com/apache/skywalking-eyes/license-eye/pkg/header"
 
@@ -31,7 +32,7 @@ var CheckCommand = &cobra.Command{
        Long:    "check command walks the specified paths recursively and 
checks if the specified files have the license header in the config file.",
        RunE: func(cmd *cobra.Command, args []string) error {
                var config config.Config
-               var result header.Result
+               var result pkg.Result
 
                if err := config.Parse(cfgFile); err != nil {
                        return err
diff --git a/license-eye/commands/header/fix.go 
b/license-eye/commands/header/fix.go
index b65069f..4fbba48 100644
--- a/license-eye/commands/header/fix.go
+++ b/license-eye/commands/header/fix.go
@@ -23,6 +23,7 @@ import (
        "strings"
 
        "github.com/apache/skywalking-eyes/license-eye/internal/logger"
+       "github.com/apache/skywalking-eyes/license-eye/pkg"
        "github.com/apache/skywalking-eyes/license-eye/pkg/config"
        "github.com/apache/skywalking-eyes/license-eye/pkg/header"
        "github.com/spf13/cobra"
@@ -34,7 +35,7 @@ var FixCommand = &cobra.Command{
        Long:    "fix command walks the specified paths recursively and fix the 
license header if the specified files don't have the license header.",
        RunE: func(cmd *cobra.Command, args []string) error {
                var config config.Config
-               var result header.Result
+               var result pkg.Result
 
                if err := config.Parse(cfgFile); err != nil {
                        return err
diff --git a/license-eye/internal/logger/log.go 
b/license-eye/internal/logger/log.go
index 1c641e8..7fc102e 100644
--- a/license-eye/internal/logger/log.go
+++ b/license-eye/internal/logger/log.go
@@ -29,7 +29,7 @@ func init() {
        if Log == nil {
                Log = logrus.New()
        }
-       Log.Level = logrus.InfoLevel
+       Log.Level = logrus.DebugLevel
        Log.SetOutput(os.Stdout)
        Log.SetFormatter(&logrus.TextFormatter{
                DisableTimestamp:       true,
diff --git a/license-eye/pkg/header/check.go b/license-eye/pkg/header/check.go
index 8053cd3..c32279b 100644
--- a/license-eye/pkg/header/check.go
+++ b/license-eye/pkg/header/check.go
@@ -18,7 +18,6 @@
 package header
 
 import (
-       "bufio"
        "io/ioutil"
        "net/http"
        "os"
@@ -27,20 +26,20 @@ import (
        "strings"
 
        "github.com/apache/skywalking-eyes/license-eye/internal/logger"
+       "github.com/apache/skywalking-eyes/license-eye/pkg"
+       lcs "github.com/apache/skywalking-eyes/license-eye/pkg/license"
 
        "github.com/bmatcuk/doublestar/v2"
 )
 
-// TODO: also trim stop words
 var (
        // LicenseLocationThreshold specifies the index threshold where the 
license header can be located,
        // after all, a "header" cannot be TOO far from the file start.
        LicenseLocationThreshold = 80
-       Punctuations             = 
regexp.MustCompile("[\\[\\]/*:;\\s#\\-!~'\"(){}?]+")
 )
 
 // Check checks the license headers of the specified paths/globs.
-func Check(config *ConfigHeader, result *Result) error {
+func Check(config *ConfigHeader, result *pkg.Result) error {
        for _, pattern := range config.Paths {
                if err := checkPattern(pattern, result, config); err != nil {
                        return err
@@ -52,7 +51,7 @@ func Check(config *ConfigHeader, result *Result) error {
 
 var seen = make(map[string]bool)
 
-func checkPattern(pattern string, result *Result, config *ConfigHeader) error {
+func checkPattern(pattern string, result *pkg.Result, config *ConfigHeader) 
error {
        paths, err := doublestar.Glob(pattern)
 
        if err != nil {
@@ -73,7 +72,7 @@ func checkPattern(pattern string, result *Result, config 
*ConfigHeader) error {
        return nil
 }
 
-func checkPath(path string, result *Result, config *ConfigHeader) error {
+func checkPath(path string, result *pkg.Result, config *ConfigHeader) error {
        defer func() { seen[path] = true }()
 
        if yes, err := config.ShouldIgnore(path); yes || seen[path] || err != 
nil {
@@ -106,7 +105,7 @@ func checkPath(path string, result *Result, config 
*ConfigHeader) error {
 }
 
 // CheckFile checks whether or not the file contains the configured license 
header.
-func CheckFile(file string, config *ConfigHeader, result *Result) error {
+func CheckFile(file string, config *ConfigHeader, result *pkg.Result) error {
        if yes, err := config.ShouldIgnore(file); yes || err != nil {
                if !seen[file] {
                        result.Ignore(file)
@@ -116,14 +115,6 @@ func CheckFile(file string, config *ConfigHeader, result 
*Result) error {
 
        logger.Log.Debugln("Checking file:", file)
 
-       reader, err := os.Open(file)
-
-       if err != nil {
-               return err
-       }
-
-       var lines []string
-
        bs, err := ioutil.ReadFile(file)
        if err != nil {
                return err
@@ -133,24 +124,13 @@ func CheckFile(file string, config *ConfigHeader, result 
*Result) error {
                return nil
        }
 
-       scanner := bufio.NewScanner(reader)
-       for scanner.Scan() {
-               line := 
strings.ToLower(Punctuations.ReplaceAllString(scanner.Text(), " "))
-               if len(line) > 0 {
-                       lines = append(lines, line)
-               }
-       }
+       content := lcs.NormalizeHeader(string(bs))
+       expected, pattern := config.NormalizedLicense(), 
config.NormalizedPattern()
 
-       content := Punctuations.ReplaceAllString(strings.Join(lines, " "), " ")
-       license, pattern := config.NormalizedLicense(), 
config.NormalizedPattern()
-
-       if satisfy(content, license, pattern) {
+       if satisfy(content, expected, pattern) {
                result.Succeed(file)
        } else {
                logger.Log.Debugln("Content is:", content)
-               if pattern != nil {
-                       logger.Log.Debugln("Pattern is:", pattern)
-               }
 
                result.Fail(file)
        }
@@ -159,7 +139,7 @@ func CheckFile(file string, config *ConfigHeader, result 
*Result) error {
 }
 
 func satisfy(content, license string, pattern *regexp.Regexp) bool {
-       if index := strings.Index(content, license); index >= 0 {
+       if index := strings.Index(content, license); license != "" && index >= 
0 {
                return index < LicenseLocationThreshold
        }
 
diff --git a/license-eye/pkg/header/check_test.go 
b/license-eye/pkg/header/check_test.go
index 2cc2de7..ca8e1bb 100644
--- a/license-eye/pkg/header/check_test.go
+++ b/license-eye/pkg/header/check_test.go
@@ -24,6 +24,7 @@ import (
        "strings"
        "testing"
 
+       "github.com/apache/skywalking-eyes/license-eye/pkg"
        "gopkg.in/yaml.v3"
 )
 
@@ -48,7 +49,7 @@ func TestCheckFile(t *testing.T) {
        type args struct {
                name       string
                file       string
-               result     *Result
+               result     *pkg.Result
                wantErr    bool
                hasFailure bool
        }
@@ -64,7 +65,7 @@ func TestCheckFile(t *testing.T) {
                        cases = append(cases, args{
                                name:       file,
                                file:       file,
-                               result:     &Result{},
+                               result:     &pkg.Result{},
                                wantErr:    false,
                                hasFailure: false,
                        })
@@ -99,7 +100,7 @@ func TestCheckFileFailure(t *testing.T) {
        type args struct {
                name       string
                file       string
-               result     *Result
+               result     *pkg.Result
                wantErr    bool
                hasFailure bool
        }
@@ -115,7 +116,7 @@ func TestCheckFileFailure(t *testing.T) {
                        cases = append(cases, args{
                                name:       file,
                                file:       file,
-                               result:     &Result{},
+                               result:     &pkg.Result{},
                                wantErr:    false,
                                hasFailure: true,
                        })
diff --git a/license-eye/pkg/header/config.go b/license-eye/pkg/header/config.go
index 8911cfc..d303dd4 100644
--- a/license-eye/pkg/header/config.go
+++ b/license-eye/pkg/header/config.go
@@ -19,15 +19,14 @@ package header
 
 import (
        "bufio"
-       "io/ioutil"
        "os"
        "regexp"
        "strings"
 
        "github.com/apache/skywalking-eyes/license-eye/internal/logger"
+       "github.com/apache/skywalking-eyes/license-eye/pkg/license"
 
        "github.com/bmatcuk/doublestar/v2"
-       "gopkg.in/yaml.v3"
 )
 
 type ConfigHeader struct {
@@ -40,47 +39,19 @@ type ConfigHeader struct {
 // NormalizedLicense returns the normalized string of the license content,
 // "normalized" means the linebreaks and Punctuations are all trimmed.
 func (config *ConfigHeader) NormalizedLicense() string {
-       var lines []string
-       for _, line := range strings.Split(config.License, "\n") {
-               if len(line) > 0 {
-                       lines = append(lines, 
Punctuations.ReplaceAllString(line, " "))
-               }
-       }
-       return 
strings.ToLower(regexp.MustCompile("(?m)[\\s\"']+").ReplaceAllString(strings.Join(lines,
 " "), " "))
+       return license.Normalize(config.License)
 }
 
 func (config *ConfigHeader) NormalizedPattern() *regexp.Regexp {
-       if config.Pattern == "" || strings.TrimSpace(config.Pattern) == "" {
-               return nil
-       }
+       pattern := config.Pattern
 
-       var lines []string
-       for _, line := range strings.Split(config.Pattern, "\n") {
-               if len(line) > 0 {
-                       lines = append(lines, line)
-               }
-       }
-       content := 
regexp.MustCompile("(?m)[\\s\"':;/\\-]+").ReplaceAllString(strings.Join(lines, 
" "), " ")
-       return regexp.MustCompile("(?i).*" + content + ".*")
-}
-
-// Parse reads and parses the header check configurations in config file.
-func (config *ConfigHeader) Parse(file string) error {
-       logger.Log.Infoln("Loading configuration from file:", file)
-
-       if bytes, err := ioutil.ReadFile(file); err != nil {
-               return err
-       } else if err := yaml.Unmarshal(bytes, config); err != nil {
-               return err
+       if pattern == "" || strings.TrimSpace(pattern) == "" {
+               return nil
        }
 
-       logger.Log.Debugln("License header is:", config.NormalizedLicense())
+       pattern = license.NormalizePattern(pattern)
 
-       if len(config.Paths) == 0 {
-               config.Paths = []string{"**"}
-       }
-
-       return nil
+       return regexp.MustCompile("(?i).*" + pattern + ".*")
 }
 
 func (config *ConfigHeader) ShouldIgnore(path string) (bool, error) {
@@ -103,8 +74,6 @@ func (config *ConfigHeader) ShouldIgnore(path string) (bool, 
error) {
 }
 
 func (config *ConfigHeader) Finalize() error {
-       logger.Log.Debugln("License header is:", config.NormalizedLicense())
-
        if len(config.Paths) == 0 {
                config.Paths = []string{"**"}
        }
@@ -124,5 +93,10 @@ func (config *ConfigHeader) Finalize() error {
                }
        }
 
+       logger.Log.Debugln("License header is:", config.NormalizedLicense())
+       if p := config.NormalizedPattern(); p != nil {
+               logger.Log.Debugln("Pattern is:", p)
+       }
+
        return nil
 }
diff --git a/license-eye/pkg/header/fix.go b/license-eye/pkg/header/fix.go
index 40acde7..203de06 100644
--- a/license-eye/pkg/header/fix.go
+++ b/license-eye/pkg/header/fix.go
@@ -26,12 +26,13 @@ import (
        "strings"
 
        "github.com/apache/skywalking-eyes/license-eye/internal/logger"
+       "github.com/apache/skywalking-eyes/license-eye/pkg"
        "github.com/apache/skywalking-eyes/license-eye/pkg/comments"
 )
 
 // Fix adds the configured license header to the given file.
-func Fix(file string, config *ConfigHeader, result *Result) error {
-       var r Result
+func Fix(file string, config *ConfigHeader, result *pkg.Result) error {
+       var r pkg.Result
        if err := CheckFile(file, config, &r); err != nil || !r.HasFailure() {
                logger.Log.Warnln("Try to fix a valid file, do nothing:", file)
                return err
@@ -50,7 +51,7 @@ func Fix(file string, config *ConfigHeader, result *Result) 
error {
        return nil
 }
 
-func InsertComment(file string, style *comments.CommentStyle, config 
*ConfigHeader, result *Result) error {
+func InsertComment(file string, style *comments.CommentStyle, config 
*ConfigHeader, result *pkg.Result) error {
        stat, err := os.Stat(file)
        if err != nil {
                return err
diff --git a/license-eye/pkg/license/norm.go b/license-eye/pkg/license/norm.go
new file mode 100644
index 0000000..771f2b2
--- /dev/null
+++ b/license-eye/pkg/license/norm.go
@@ -0,0 +1,171 @@
+//
+// Licensed to Apache Software Foundation (ASF) under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Apache Software Foundation (ASF) licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+package license
+
+import (
+       "regexp"
+       "strings"
+)
+
+type Normalizer func(string) string
+
+var (
+       // normalizers is a list of Normalizer that can be applied to the 
license text, yet doesn't change the license's
+       // meanings, according to the matching guide in 
https://spdx.dev/license-list/matching-guidelines.
+       // The order matters.
+       normalizers = []Normalizer{
+               OneLineNormalizer,
+               FlattenSpaceNormalizer,
+               SubstantiveTextsNormalizer,
+               strings.ToLower,
+               strings.TrimSpace,
+       }
+
+       // 6. Code Comment Indicators 
(https://spdx.dev/license-list/matching-guidelines.)
+       commentIndicators = []*regexp.Regexp{
+               regexp.MustCompile(`(?m)^\s*#+`),    // #
+               regexp.MustCompile(`(?m)^\s*//+`),   // //
+               regexp.MustCompile(`(?m)^\s*"""+`),  // """
+               regexp.MustCompile(`(?m)^\s*\(\*+`), // (*
+
+               regexp.MustCompile(`(?m)^\s*/\*+`), // /*
+               regexp.MustCompile(`(?m)^\s*\*+/`), //  */
+               regexp.MustCompile(`(?m)^\s*\*+`),  //  *
+
+               regexp.MustCompile(`(?m)^\s*<!--+`), // <!--
+               regexp.MustCompile(`(?m)^\s*--+>`),  // -->
+               regexp.MustCompile(`(?m)^\s*--+`),   // --
+               regexp.MustCompile(`(?m)^\s*~+`),    //   ~
+
+               regexp.MustCompile(`(?m)^\s*{-+`), // {-
+               regexp.MustCompile(`(?m)^\s*-}+`), // -}
+
+               regexp.MustCompile(`(?m)^\s*::`),   // ::
+               regexp.MustCompile(`(?m)^\s*@REM`), // @REM
+       }
+
+       flattenSpace = regexp.MustCompile(`\s+`)
+
+       substitutableTexts = []struct {
+               regex       *regexp.Regexp
+               replacement string
+       }{
+               {regexp.MustCompile(`(?i)\backnowledgement\b`), 
"acknowledgment"},
+               {regexp.MustCompile(`(?i)\banalog\b`), "analogue"},
+               {regexp.MustCompile(`(?i)\banalyze\b`), "analyse"},
+               {regexp.MustCompile(`(?i)\bartifact\b`), "artefact"},
+               {regexp.MustCompile(`(?i)\bauthorization\b`), "authorisation"},
+               {regexp.MustCompile(`(?i)\bauthorized\b`), "authorised"},
+               {regexp.MustCompile(`(?i)\bcaliber\b`), "calibre"},
+               {regexp.MustCompile(`(?i)\bcanceled\b`), "cancelled"},
+               {regexp.MustCompile(`(?i)\bcapitalizations\b`), 
"capitalisations"},
+               {regexp.MustCompile(`(?i)\bcatalog\b`), "catalogue"},
+               {regexp.MustCompile(`(?i)\bcategorize\b`), "categorise"},
+               {regexp.MustCompile(`(?i)\bcenter\b`), "centre"},
+               {regexp.MustCompile(`(?i)\bcopyright holder\b`), "copyright 
owner"},
+               {regexp.MustCompile(`(?i)\bemphasized\b`), "emphasised"},
+               {regexp.MustCompile(`(?i)\bfavor\b`), "favour"},
+               {regexp.MustCompile(`(?i)\bfavorite\b`), "favourite"},
+               {regexp.MustCompile(`(?i)\bfulfill\b`), "fulfil"},
+               {regexp.MustCompile(`(?i)\bfulfillment\b`), "fulfilment"},
+               {regexp.MustCompile(`(?i)\binitialize\b`), "initialise"},
+               {regexp.MustCompile(`(?i)\bjudgement\b`), "judgment"},
+               {regexp.MustCompile(`(?i)\blabeling\b`), "labelling"},
+               {regexp.MustCompile(`(?i)\blabor\b`), "labour"},
+               {regexp.MustCompile(`(?i)\blicense\b`), "licence"},
+               {regexp.MustCompile(`(?i)\bmaximize\b`), "maximise"},
+               {regexp.MustCompile(`(?i)\bmodeled\b`), "modelled"},
+               {regexp.MustCompile(`(?i)\bmodeling\b`), "modelling"},
+               {regexp.MustCompile(`(?i)\bnoncommercial\b`), "non-commercial"},
+               {regexp.MustCompile(`(?i)\boffense\b`), "offence"},
+               {regexp.MustCompile(`(?i)\boptimize\b`), "optimise"},
+               {regexp.MustCompile(`(?i)\borganization\b`), "organisation"},
+               {regexp.MustCompile(`(?i)\borganize\b`), "organise"},
+               {regexp.MustCompile(`(?i)\bpercent\b`), "per cent"},
+               {regexp.MustCompile(`(?i)\bpractice\b`), "practise"},
+               {regexp.MustCompile(`(?i)\bprogram\b`), "programme"},
+               {regexp.MustCompile(`(?i)\brealize\b`), "realise"},
+               {regexp.MustCompile(`(?i)\brecognize\b`), "recognise"},
+               {regexp.MustCompile(`(?i)\bsignaling\b`), "signalling"},
+               {regexp.MustCompile(`(?i)\bsublicense\b`), "sub-license"},
+               {regexp.MustCompile(`(?i)\bsub-license\b`), "sub license"},
+               {regexp.MustCompile(`(?i)\bsublicense\b`), "sub license"},
+               {regexp.MustCompile(`(?i)\butilization\b`), "utilisation"},
+               {regexp.MustCompile(`(?i)\bwhile\b`), "whilst"},
+               {regexp.MustCompile(`(?i)\bwilfull\b`), "wilful"},
+
+               {regexp.MustCompile(`©`), "Copyright "},
+               {regexp.MustCompile(`\(c\)`), "Copyright "},
+               {regexp.MustCompile(`\bhttps://`), "http://"},
+
+               {regexp.MustCompile(`(?i)\b(the )?Apache Software Foundation( 
\(ASF\))?`), "the ASF"},
+       }
+)
+
+// NormalizePattern applies a chain of Normalizers to the license pattern to 
make it cleaner for identification.
+func NormalizePattern(pattern string) string {
+       for _, normalize := range normalizers {
+               pattern = normalize(pattern)
+       }
+       return pattern
+}
+
+// NormalizeHeader applies a chain of Normalizers to the file header to make 
it cleaner for identification.
+func NormalizeHeader(header string) string {
+       ns := append([]Normalizer{CommentIndicatorNormalizer}, normalizers...)
+       for _, normalize := range ns {
+               header = normalize(header)
+       }
+       return header
+}
+
+// Normalize applies a chain of Normalizers to the license text to make it 
cleaner for identification.
+func Normalize(license string) string {
+       for _, normalize := range normalizers {
+               license = normalize(license)
+       }
+       return license
+}
+
+// OneLineNormalizer simply removes all line breaks to flatten the license 
text into one line.
+func OneLineNormalizer(text string) string {
+       return regexp.MustCompile("[\n\r]+").ReplaceAllString(text, " ")
+}
+
+// SubstantiveTextsNormalizer normalizes the license text by substituting some 
words that
+// doesn't change the meaning of the license.
+func SubstantiveTextsNormalizer(text string) string {
+       for _, s := range substitutableTexts {
+               text = s.regex.ReplaceAllString(text, s.replacement)
+       }
+       return text
+}
+
+// CommentIndicatorNormalizer trims the leading characters of comments, such 
as /*, <!--, --, (*, etc..
+func CommentIndicatorNormalizer(text string) string {
+       for _, leadingChars := range commentIndicators {
+               text = leadingChars.ReplaceAllString(text, "")
+       }
+       return text
+}
+
+// FlattenSpaceNormalizer flattens continuous spaces into a single space.
+func FlattenSpaceNormalizer(text string) string {
+       return flattenSpace.ReplaceAllString(text, " ")
+}
diff --git a/license-eye/pkg/license/norm_test.go 
b/license-eye/pkg/license/norm_test.go
new file mode 100644
index 0000000..fd9fbc5
--- /dev/null
+++ b/license-eye/pkg/license/norm_test.go
@@ -0,0 +1,260 @@
+//
+// Licensed to Apache Software Foundation (ASF) under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Apache Software Foundation (ASF) licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+package license
+
+import "testing"
+
+type input struct {
+       name string
+       text string
+       want string
+}
+
+func TestCommentLeadingCharsNormalizer(t *testing.T) {
+       want := ` Licensed to the Apache Software Foundation (ASF) under one or 
more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+`
+       tests := []input{
+               {
+                       name: "Jave",
+                       want: want,
+                       text: `
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+`,
+               },
+               {
+                       name: "Python",
+                       want: want,
+                       text: `
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+`,
+               },
+               {
+                       name: "XML",
+                       want: want,
+                       text: `
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+-->
+`,
+               },
+               {
+                       name: "GoLang",
+                       want: want,
+                       text: `
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+`,
+               },
+               {
+                       name: "SQL",
+                       want: want,
+                       text: `
+-- Licensed to the Apache Software Foundation (ASF) under one or more
+-- contributor license agreements.  See the NOTICE file distributed with
+-- this work for additional information regarding copyright ownership.
+-- The ASF licenses this file to You under the Apache License, Version 2.0
+-- (the "License"); you may not use this file except in compliance with
+-- the License.  You may obtain a copy of the License at
+--
+--    http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+`,
+               },
+               {
+                       name: "BAT1",
+                       want: want,
+                       text: `
+:: Licensed to the Apache Software Foundation (ASF) under one or more
+:: contributor license agreements.  See the NOTICE file distributed with
+:: this work for additional information regarding copyright ownership.
+:: The ASF licenses this file to You under the Apache License, Version 2.0
+:: (the "License"); you may not use this file except in compliance with
+:: the License.  You may obtain a copy of the License at
+::
+::    http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing, software
+:: distributed under the License is distributed on an "AS IS" BASIS,
+:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+:: See the License for the specific language governing permissions and
+:: limitations under the License.
+::
+`,
+               },
+               {
+                       name: "BAT2",
+                       want: want,
+                       text: `
+@REM Licensed to the Apache Software Foundation (ASF) under one or more
+@REM contributor license agreements.  See the NOTICE file distributed with
+@REM this work for additional information regarding copyright ownership.
+@REM The ASF licenses this file to You under the Apache License, Version 2.0
+@REM (the "License"); you may not use this file except in compliance with
+@REM the License.  You may obtain a copy of the License at
+@REM
+@REM    http://www.apache.org/licenses/LICENSE-2.0
+@REM
+@REM Unless required by applicable law or agreed to in writing, software
+@REM distributed under the License is distributed on an "AS IS" BASIS,
+@REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@REM See the License for the specific language governing permissions and
+@REM limitations under the License.
+@REM
+`,
+               },
+               {
+                       name: "PythonTripleQuotes",
+                       text: `
+"""
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+`,
+                       want: `
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+`,
+               },
+       }
+       for _, tt := range tests {
+               t.Run(tt.name, func(t *testing.T) {
+                       if got := CommentIndicatorNormalizer(tt.text); got != 
tt.want {
+                               t.Errorf("%v %v", len(got), len(tt.want))
+                               t.Errorf("CommentIndicatorNormalizer() = %v, 
want %v", got, tt.want)
+                       }
+               })
+       }
+}
+
+func TestSubstantiveTextsNormalizer(t *testing.T) {
+       tests := []input{
+               {
+                       name: "ASF",
+                       text: "Licensed to the Apache Software Foundation (ASF) 
under one or more",
+                       want: "Licensed to the ASF under one or more",
+               },
+       }
+       for _, tt := range tests {
+               t.Run(tt.name, func(t *testing.T) {
+                       if got := SubstantiveTextsNormalizer(tt.text); got != 
tt.want {
+                               t.Errorf("SubstantiveTextsNormalizer() = %v, 
want %v", got, tt.want)
+                       }
+               })
+       }
+}
diff --git a/license-eye/pkg/header/result.go b/license-eye/pkg/result.go
similarity index 99%
rename from license-eye/pkg/header/result.go
rename to license-eye/pkg/result.go
index 068fe75..9333671 100644
--- a/license-eye/pkg/header/result.go
+++ b/license-eye/pkg/result.go
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 //
-package header
+package pkg
 
 import (
        "fmt"

Reply via email to