Philippe Ombredanne <[email protected]>:
> On Sat, Nov 21, 2015 at 11:11 PM, Eric S. Raymond <[email protected]> wrote:
> > I've enclosed a copy of a proof-of-concept program in Python that walks a
> > code tree replacing inline license headers with SPDX tags. It can be
> > tested as a filter - feed a source file to its stdin, get back the
> > SPDXified version on stdout.
>
> Erirc:
> I think the code is not enclosed
>
> > Can we cooperate on making this a production-quality tool?
>
> I am game. FWIW, I maintain the scancode-toolkit that does scan and
> detects licenses in code and that could be useful.
> And it is also coded in Python ;)
>
> --
> Cordially
> Philippe Ombredanne
Ooops. Sorry. Here it is...
--
<a href="http://www.catb.org/~esr/">Eric S. Raymond</a>
#!/usr/bin/env python
"""
spdxify - replace inline licenses with SPDX tags
Usage: spdxify [-x exclude] [-V] [path-or-dir]...
Options:
-x path -- exclude specified path
-V -- print version and exit
Called without arguments it behaves as a filter. Called with arguments,
each file is modified in place; each directory is recursed into and all
files beneath modified in place.
Files and subdirectories beginning with a dot "." are ignored. So
are RCS/CVS masters (filenames ending with ,v) and files named LICENSE
or COPYING.
"""
import sys, os, string, getopt, time, re
version = "0.1"
recognition = {
"GPL-2.0+":"""\
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
""",
"MIT":"""\
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
""",
"BSD-2-Clause":"""\
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
""",
}
# How to recognize candidate first lines of license spans
match = dict([(text[:32], key) for (key,text) in recognition.items()])
class Baton:
"Ship progress indications to stderr."
def __init__(self, prompt, endmsg=None):
self.stream = sys.stderr
self.stream.write(prompt + "...")
if os.isatty(self.stream.fileno()):
self.stream.write(" \b")
self.stream.flush()
self.count = 0
self.endmsg = endmsg
self.time = time.time()
return
def twirl(self, ch=None):
if self.stream is None:
return
if os.isatty(self.stream.fileno()):
if ch:
self.stream.write(ch)
else:
self.stream.write("-/|\\"[self.count % 4])
self.stream.write("\b")
self.stream.flush()
self.count = self.count + 1
return
def end(self, msg=None):
if msg == None:
msg = self.endmsg
if self.stream:
self.stream.write("...(%2.2f sec) %s.\n" % (time.time() -
self.time, msg))
return
baton = None
def forester(roots, excludes):
"Expand directories in a path list into files."
sublist = []
for root in roots:
if os.path.isfile(root):
sublist.append(root)
elif os.path.isdir(root):
for dirpath, dirs, files in os.walk(root):
for name in files:
path = os.path.join(dirpath, name)
if path in excludes:
continue
# Ignore dotfiles, including repository subdirectories.
# Ignore RCS and CVS masters. Finaly, as a special
# case, ignore file names conventionally used for entire
# licenses.
if not "/." in path and not path.endswith(",v") \
and not path.endswith("COPYING") \
and not path.endswith("LICENSE"):
sublist.append(path)
else:
print >>sys.stderr, "spdxify: unexpected node type at ", root
#sublist.sort()
return sublist
# Try to match comment amd comment box characters.
border = re.compile("^[^A-Za-z]*")
def spdx_filter(rfp, wfp):
"Report on or transform a single file."
# Ignore files with non-printable characters in the first line
text = rfp.readline()
if not all(c in string.printable for c in text):
return False
wfp.write(text)
altered = False
state = "init"
candidate = []
span = ""
ln = 0
for line in rfp:
ln += 1
if state == "init":
for leader in match.keys():
if leader in line:
span = line
state = "in_license"
spdx = match[leader]
candidate = recognition[spdx].strip().split('\n')
candidate.pop(0)
break
else:
wfp.write(line)
if "WARRANTY" in line:
sys.stderr.write("\"%s\", line %d: probable unrecognized
license.\n"
% (rfp.name, ln))
elif state == "in_license":
if not candidate:
# We've consumed the license copy with matching lines
# Use all non-alphabetic characters leading the span
# as a comment leader.
m = border.match(span)
# Discard span, replace with SPDX tag
wfp.write(m.group(0) + "SPDX-License-Identifier: %s\n" % spdx)
# We actually hit this on the line *after* the license
inclusion,
# so write it out.
wfp.write(line)
altered = True
# FIXME: still need to compute leader from span
state = "init"
elif candidate[0] in line:
# Still matching
span += line
candidate.pop(0)
else:
span += line
# Unexpected text while matching license
sys.stderr.write("\"%s\", line %d: unexpected line beginning
'%s' for %s.\n"
% (rfp.name, ln, line[:20], spdx))
# Recover by restoring the span into the text
wfp.write(span)
state = "init"
return altered
if __name__ == "__main__":
(options, arguments) = getopt.getopt(sys.argv[1:], "x:vV",
["exclude", "verbose", "version"])
exclusions = []
verbose = False
for (switch, val) in options:
if switch in ('-h', '--help'):
sys.stderr.write(__doc__)
sys.exit(0)
elif switch in ('-v', '--verbose'):
verbose = True
elif switch in ('-V', '--version'):
print "deheader", version
raise SystemExit(0)
elif switch in ('-x', '--exclude'):
exclusions.append(val)
if not arguments:
spdx_filter(sys.stdin, sys.stdout)
else:
if not verbose:
baton = Baton("spdxify:", "done")
for path in forester(arguments, exclusions):
if verbose:
print path
elif baton:
baton.twirl()
tagified = path + "." + str(os.getpid())
if spdx_filter(open(path, "r"), open(tagified, "w")):
with open(path, "w") as wfp:
with open(tagified) as rfp:
wfp.write(rfp.read())
os.unlink(tagified)
if baton:
baton.end()
# end
_______________________________________________
Spdx-tech mailing list
[email protected]
https://lists.spdx.org/mailman/listinfo/spdx-tech