Philippe Ombredanne <[email protected]>:
> On Sat, Nov 21, 2015 at 11:11 PM, Eric S. Raymond <[email protected]> wrote:
> > I've enclosed a copy of a proof-of-concept program in Python that walks a
> > code tree replacing inline license headers with SPDX tags.  It can be
> > tested as a filter - feed a source file to its stdin, get back the
> > SPDXified version on stdout.
> 
> Erirc:
> I think the code is  not enclosed
> 
> > Can we cooperate on making this a production-quality tool?
> 
> I am game. FWIW, I maintain the scancode-toolkit that does scan and
> detects licenses in code and that could be useful.
> And it is also coded in Python ;)
> 
> -- 
> Cordially
> Philippe Ombredanne

Ooops.  Sorry.  Here it is...
-- 
                <a href="http://www.catb.org/~esr/";>Eric S. Raymond</a>
#!/usr/bin/env python
"""
spdxify - replace inline licenses with SPDX tags

Usage: spdxify [-x exclude] [-V] [path-or-dir]...

Options:
   -x path       -- exclude specified path
   -V            -- print version and exit

Called without arguments it behaves as a filter.  Called with arguments,
each file is modified in place; each directory is recursed into and all
files beneath modified in place.

Files and subdirectories beginning with a dot "." are ignored.  So
are RCS/CVS masters (filenames ending with ,v) and files named LICENSE
or COPYING.
"""

import sys, os, string, getopt, time, re

version = "0.1"

recognition = {
"GPL-2.0+":"""\
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
""",
"MIT":"""\
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
""",
"BSD-2-Clause":"""\
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
""",
}

# How to recognize candidate first lines of license spans
match = dict([(text[:32], key) for (key,text) in recognition.items()])

class Baton:
    "Ship progress indications to stderr."
    def __init__(self, prompt, endmsg=None):
        self.stream = sys.stderr
        self.stream.write(prompt + "...")
        if os.isatty(self.stream.fileno()):
            self.stream.write(" \b")
        self.stream.flush()
        self.count = 0
        self.endmsg = endmsg
        self.time = time.time()
        return
    def twirl(self, ch=None):
        if self.stream is None:
            return
        if os.isatty(self.stream.fileno()):
            if ch:
                self.stream.write(ch)
            else:
                self.stream.write("-/|\\"[self.count % 4])
                self.stream.write("\b")
            self.stream.flush()
        self.count = self.count + 1
        return
    def end(self, msg=None):
        if msg == None:
            msg = self.endmsg
        if self.stream:
            self.stream.write("...(%2.2f sec) %s.\n" % (time.time() - 
self.time, msg))
        return

baton = None

def forester(roots, excludes):
    "Expand directories in a path list into files."
    sublist = []
    for root in roots:
        if os.path.isfile(root):
            sublist.append(root)
        elif os.path.isdir(root):
            for dirpath, dirs, files in os.walk(root):
                for name in files:
                    path = os.path.join(dirpath, name)
                    if path in excludes:
                        continue
                    # Ignore dotfiles, including repository subdirectories.
                    # Ignore RCS and CVS masters. Finaly, as a special
                    # case, ignore file names conventionally used for entire
                    # licenses.
                    if not "/." in path and not path.endswith(",v") \
                           and not path.endswith("COPYING") \
                           and not path.endswith("LICENSE"):
                        sublist.append(path)
        else:
            print >>sys.stderr, "spdxify: unexpected node type at ", root
    #sublist.sort()
    return sublist

# Try to match comment amd comment box characters.
border = re.compile("^[^A-Za-z]*")

def spdx_filter(rfp, wfp):
    "Report on or transform a single file."
    # Ignore files with non-printable characters in the first line
    text = rfp.readline()
    if not all(c in string.printable for c in text):
        return False
    wfp.write(text)
    altered = False
    state = "init"
    candidate = []
    span = ""
    ln = 0
    for line in rfp:
        ln += 1
        if state == "init":
            for leader in match.keys():
                if leader in line:
                    span = line
                    state = "in_license"
                    spdx = match[leader]
                    candidate = recognition[spdx].strip().split('\n')
                    candidate.pop(0)
                    break
            else:
                wfp.write(line)
            if "WARRANTY" in line:
                sys.stderr.write("\"%s\", line %d: probable unrecognized 
license.\n"
                                 % (rfp.name, ln))
                
        elif state == "in_license":
            if not candidate:
                # We've consumed the license copy with matching lines
                # Use all non-alphabetic characters leading the span
                # as a comment leader.
                m = border.match(span)
                # Discard span, replace with SPDX tag
                wfp.write(m.group(0) + "SPDX-License-Identifier: %s\n" % spdx)
                # We actually hit this on the line *after* the license 
inclusion,
                # so write it out.
                wfp.write(line)
                altered = True
                # FIXME: still need to compute leader from span
                state = "init"
            elif candidate[0] in line:
                # Still matching
                span += line
                candidate.pop(0)
            else:
                span += line
                # Unexpected text while matching license
                sys.stderr.write("\"%s\", line %d: unexpected line beginning 
'%s' for %s.\n"
                                 % (rfp.name, ln, line[:20], spdx))
                # Recover by restoring the span into the text
                wfp.write(span)
                state = "init"
    return altered

if __name__ == "__main__":
    (options, arguments) = getopt.getopt(sys.argv[1:], "x:vV",
                                         ["exclude", "verbose", "version"])
    exclusions = []
    verbose = False
    for (switch, val) in options:
        if switch in ('-h', '--help'):
            sys.stderr.write(__doc__)
            sys.exit(0)
        elif switch in ('-v', '--verbose'):
            verbose = True
        elif switch in ('-V', '--version'):
            print "deheader", version
            raise SystemExit(0)
        elif switch in ('-x', '--exclude'):
            exclusions.append(val)

    if not arguments:
        spdx_filter(sys.stdin, sys.stdout)
    else:
        if not verbose:
            baton = Baton("spdxify:", "done")
        for path in forester(arguments, exclusions):
            if verbose:
                print path
            elif baton:
                baton.twirl()
            tagified = path + "." + str(os.getpid())
            if spdx_filter(open(path, "r"), open(tagified, "w")):
                with open(path, "w") as wfp:
                    with open(tagified) as rfp:
                        wfp.write(rfp.read())
            os.unlink(tagified)

        if baton:
            baton.end()

# end


_______________________________________________
Spdx-tech mailing list
[email protected]
https://lists.spdx.org/mailman/listinfo/spdx-tech

Reply via email to