From: John Wright <[email protected]> The new module can parse, create, and edit DEP5-formatted debian/copyright files.
Currently it only parses the header paragraph, except for the License field. Follow-up changes will add support for the License field and the rest of the paragraphs. --- lib/debian/copyright.py | 222 +++++++++++++++++++++++++++++++++++++++ tests/test_copyright.py | 273 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 495 insertions(+) create mode 100644 lib/debian/copyright.py create mode 100755 tests/test_copyright.py diff --git a/lib/debian/copyright.py b/lib/debian/copyright.py new file mode 100644 index 0000000..8315efc --- /dev/null +++ b/lib/debian/copyright.py @@ -0,0 +1,222 @@ +# vim: fileencoding=utf-8 +# +# Copyright (C) 2014 Google, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +"""Utilities for parsing and creating machine-readable debian/copyright files. + +The specification for the format (also known as DEP5) is available here: +https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ + +TODO(jsw): Add example usage. +""" + +from __future__ import unicode_literals + +import collections +import re +import string +import warnings + +from debian import deb822 + + +_CURRENT_FORMAT = ( + 'http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/') + +_KNOWN_FORMATS = frozenset([ + _CURRENT_FORMAT, + # TODO(jsw): Transparently rewrite https:// as http://, at least for this? + 'https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/', +]) + + +class Error(Exception): + """Base class for exceptions in this module.""" + + +class NotMachineReadableError(Error): + """Raised when the input is not a machine-readable debian/copyright file.""" + + +class Copyright(object): + """Represents a debian/copyright file.""" + + def __init__(self, sequence=None, encoding='utf-8'): + """Initializer. + + :param sequence: Sequence of lines, e.g. a list of strings or a + file-like object. If not specified, a blank Copyright object is + initialized. + :param encoding: Encoding to use, in case input is raw byte strings. + It is recommended to use unicode objects everywhere instead, e.g. + by opening files in text mode. + + Raises: + NotMachineReadableError if 'sequence' does not contain a + machine-readable debian/copyright file. + """ + super(Copyright, self).__init__() + + if sequence is not None: + paragraphs = list(deb822.Deb822.iter_paragraphs( + sequence=sequence, encoding=encoding)) + if len(paragraphs) > 0: + self.__header = Header(paragraphs[0]) + # TODO(jsw): Parse the rest of the paragraphs. + else: + self.__header = Header() + + @property + def header(self): + """The file header paragraph.""" + return self.__header + + @header.setter + def header(self, hdr): + if not isinstance(hdr, Header): + raise TypeError('value must be a Header object') + self.__header = hdr + + +def _single_line(s): + """Returns s if it is a single line; otherwise raises ValueError.""" + if '\n' in s: + raise ValueError('must be single line') + return s + + +class _LineBased(object): + """Namespace for conversion methods for line-based lists as tuples.""" + # TODO(jsw): Expose this somewhere else? It may have more general utility. + + @staticmethod + def from_str(s): + """Returns the lines in 's', with whitespace stripped, as a tuple.""" + return tuple(v for v in + (line.strip() for line in (s or '').strip().splitlines()) + if v) + + @staticmethod + def to_str(seq): + """Returns the sequence as a string with each element on its own line. + + If 'seq' has one element, the result will be on a single line. + Otherwise, the first line will be blank. + """ + l = list(seq) + if not l: + return None + + def process_and_validate(s): + s = s.strip() + if not s: + raise ValueError('values must not be empty') + if '\n' in s: + raise ValueError('values must not contain newlines') + return s + + if len(l) == 1: + return process_and_validate(l[0]) + + tmp = [''] + for s in l: + tmp.append(' ' + process_and_validate(s)) + return '\n'.join(tmp) + + +class _SpaceSeparated(object): + """Namespace for conversion methods for space-separated lists as tuples.""" + # TODO(jsw): Expose this somewhere else? It may have more general utility. + + _has_space = re.compile(r'\s') + + @staticmethod + def from_str(s): + """Returns the values in s as a tuple (empty if only whitespace).""" + return tuple(v for v in (s or '').split() if v) + + @classmethod + def to_str(cls, seq): + """Returns the sequence as a space-separated string (None if empty).""" + l = list(seq) + if not l: + return None + tmp = [] + for s in l: + if cls._has_space.search(s): + raise ValueError('values must not contain whitespace') + s = s.strip() + if not s: + raise ValueError('values must not be empty') + tmp.append(s) + return ' '.join(tmp) + + +class Header(deb822.RestrictedWrapper): + """Represents the header paragraph of a debian/copyright file. + + Property values are all immutable, such that in order to modify them you + must explicitly set them (rather than modifying a returned reference). + """ + + def __init__(self, data=None): + """Initializer. + + :param parsed: A deb822.Deb822 object for underlying data. If None, a + new one will be created. + """ + if data is None: + data = deb822.Deb822() + data['Format'] = _CURRENT_FORMAT + super(Header, self).__init__(data) + + fmt = self.format + if fmt is None: + raise NotMachineReadableError( + 'input is not a machine-readable debian/copyright') + if fmt not in _KNOWN_FORMATS: + warnings.warn('format not known: %r' % fmt) + + def known_format(self): + """Returns True iff the format is known.""" + return self.format in _KNOWN_FORMATS + + def current_format(self): + """Returns True iff the format is the current format.""" + return self.format == _CURRENT_FORMAT + + format = deb822.RestrictedField( + 'Format', to_str=_single_line, allow_none=False) + + upstream_name = deb822.RestrictedField( + 'Upstream-Name', to_str=_single_line) + + upstream_contact = deb822.RestrictedField( + 'Upstream-Contact', from_str=_LineBased.from_str, + to_str=_LineBased.to_str) + + source = deb822.RestrictedField('Source') + + disclaimer = deb822.RestrictedField('Disclaimer') + + comment = deb822.RestrictedField('Comment') + + # TODO(jsw): Parse this. + license = deb822.RestrictedField( + 'License', to_str=lambda _: None, from_str=lambda _: None) + + copyright = deb822.RestrictedField('Copyright') diff --git a/tests/test_copyright.py b/tests/test_copyright.py new file mode 100755 index 0000000..129d57e --- /dev/null +++ b/tests/test_copyright.py @@ -0,0 +1,273 @@ +#! /usr/bin/python +## vim: fileencoding=utf-8 + +# Copyright (C) 2014 Google, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +from __future__ import unicode_literals + +import sys +import unittest + +sys.path.insert(0, '../lib/') + +from debian import copyright +from debian import deb822 + + +SIMPLE = """\ +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: X Solitaire +Source: ftp://ftp.example.com/pub/games + +Files: * +Copyright: Copyright 1998 John Doe <[email protected]> +License: GPL-2+ + This program is free software; you can redistribute it + and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later + version. + . + This program is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied + warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + PURPOSE. See the GNU General Public License for more + details. + . + You should have received a copy of the GNU General Public + License along with this package; if not, write to the Free + Software Foundation, Inc., 51 Franklin St, Fifth Floor, + Boston, MA 02110-1301 USA + . + On Debian systems, the full text of the GNU General Public + License version 2 can be found in the file + `/usr/share/common-licenses/GPL-2'. + +Files: debian/* +Copyright: Copyright 1998 Jane Smith <[email protected]> +License: GPL-2+ + [LICENSE TEXT] +""" + +FORMAT = 'http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/' + + +class LineBasedTest(unittest.TestCase): + """Test for _LineBased.{to,from}_str""" + + def setUp(self): + # Alias for less typing. + self.lb = copyright._LineBased + + def test_from_str_none(self): + self.assertEqual((), self.lb.from_str(None)) + + def test_from_str_empty(self): + self.assertEqual((), self.lb.from_str('')) + + def test_from_str_single_line(self): + self.assertEqual( + ('Foo Bar <[email protected]>',), + self.lb.from_str('Foo Bar <[email protected]>')) + + def test_from_str_single_value_after_newline(self): + self.assertEqual( + ('Foo Bar <[email protected]>',), + self.lb.from_str('\n Foo Bar <[email protected]>')) + + def test_from_str_multiline(self): + self.assertEqual( + ('Foo Bar <[email protected]>', 'http://bar.com/foo'), + self.lb.from_str('\n Foo Bar <[email protected]>\n http://bar.com/foo')) + + def test_to_str_empty(self): + self.assertIsNone(self.lb.to_str([])) + self.assertIsNone(self.lb.to_str(())) + + def test_to_str_single(self): + self.assertEqual( + 'Foo Bar <[email protected]>', + self.lb.to_str(['Foo Bar <[email protected]>'])) + + def test_to_str_multi_list(self): + self.assertEqual( + '\n Foo Bar <[email protected]>\n http://bar.com/foo', + self.lb.to_str( + ['Foo Bar <[email protected]>', 'http://bar.com/foo'])) + + def test_to_str_multi_tuple(self): + self.assertEqual( + '\n Foo Bar <[email protected]>\n http://bar.com/foo', + self.lb.to_str( + ('Foo Bar <[email protected]>', 'http://bar.com/foo'))) + + def test_to_str_empty_value(self): + with self.assertRaises(ValueError) as cm: + self.lb.to_str(['foo', '', 'bar']) + self.assertEqual(('values must not be empty',), cm.exception.args) + + def test_to_str_whitespace_only_value(self): + with self.assertRaises(ValueError) as cm: + self.lb.to_str(['foo', ' \t', 'bar']) + self.assertEqual(('values must not be empty',), cm.exception.args) + + def test_to_str_elements_stripped(self): + self.assertEqual( + '\n Foo Bar <[email protected]>\n http://bar.com/foo', + self.lb.to_str( + (' Foo Bar <[email protected]>\t', ' http://bar.com/foo '))) + + def test_to_str_newlines_single(self): + with self.assertRaises(ValueError) as cm: + self.lb.to_str([' Foo Bar <[email protected]>\n http://bar.com/foo ']) + self.assertEqual( + ('values must not contain newlines',), cm.exception.args) + + def test_to_str_newlines_multi(self): + with self.assertRaises(ValueError) as cm: + self.lb.to_str( + ['bar', ' Foo Bar <[email protected]>\n http://bar.com/foo ']) + self.assertEqual( + ('values must not contain newlines',), cm.exception.args) + + +class SpaceSeparatedTest(unittest.TestCase): + """Tests for _SpaceSeparated.{to,from}_str.""" + + def setUp(self): + # Alias for less typing. + self.ss = copyright._SpaceSeparated + + def test_from_str_none(self): + self.assertEqual((), self.ss.from_str(None)) + + def test_from_str_empty(self): + self.assertEqual((), self.ss.from_str(' ')) + self.assertEqual((), self.ss.from_str('')) + + def test_from_str_single(self): + self.assertEqual(('foo',), self.ss.from_str('foo')) + self.assertEqual(('bar',), self.ss.from_str(' bar ')) + + def test_from_str_multi(self): + self.assertEqual(('foo', 'bar', 'baz'), self.ss.from_str('foo bar baz')) + self.assertEqual( + ('bar', 'baz', 'quux'), self.ss.from_str(' bar baz quux \t ')) + + def test_to_str_empty(self): + self.assertIsNone(self.ss.to_str([])) + self.assertIsNone(self.ss.to_str(())) + + def test_to_str_single(self): + self.assertEqual('foo', self.ss.to_str(['foo'])) + + def test_to_str_multi(self): + self.assertEqual('foo bar baz', self.ss.to_str(['foo', 'bar', 'baz'])) + + def test_to_str_empty_value(self): + with self.assertRaises(ValueError) as cm: + self.ss.to_str(['foo', '', 'bar']) + self.assertEqual(('values must not be empty',), cm.exception.args) + + def test_to_str_value_has_space_single(self): + with self.assertRaises(ValueError) as cm: + self.ss.to_str([' baz quux ']) + self.assertEqual( + ('values must not contain whitespace',), cm.exception.args) + + def test_to_str_value_has_space_multi(self): + with self.assertRaises(ValueError) as cm: + self.ss.to_str(['foo', ' baz quux ']) + self.assertEqual( + ('values must not contain whitespace',), cm.exception.args) + + +class CopyrightTest(unittest.TestCase): + + def test_basic_parse_success(self): + c = copyright.Copyright(sequence=SIMPLE.splitlines()) + self.assertEqual(FORMAT, c.header.format) + self.assertEqual(FORMAT, c.header['Format']) + self.assertEqual('X Solitaire', c.header.upstream_name) + self.assertEqual('X Solitaire', c.header['Upstream-Name']) + self.assertEqual('ftp://ftp.example.com/pub/games', c.header.source) + self.assertEqual('ftp://ftp.example.com/pub/games', c.header['Source']) + self.assertIsNone(c.header.license) + + +class HeaderTest(unittest.TestCase): + + def test_format_not_none(self): + h = copyright.Header() + self.assertEqual(FORMAT, h.format) + with self.assertRaises(TypeError) as cm: + h.format = None + self.assertEqual(('value must not be None',), cm.exception.args) + + def test_upstream_name_single_line(self): + h = copyright.Header() + h.upstream_name = 'Foo Bar' + self.assertEqual('Foo Bar', h.upstream_name) + with self.assertRaises(ValueError) as cm: + h.upstream_name = 'Foo Bar\n Baz' + self.assertEqual(('must be single line',), cm.exception.args) + + def test_upstream_contact_single_read(self): + data = deb822.Deb822() + data['Format'] = FORMAT + data['Upstream-Contact'] = 'Foo Bar <[email protected]>' + h = copyright.Header(data=data) + self.assertEqual(('Foo Bar <[email protected]>',), h.upstream_contact) + + def test_upstream_contact_multi1_read(self): + data = deb822.Deb822() + data['Format'] = FORMAT + data['Upstream-Contact'] = 'Foo Bar <[email protected]>\n http://bar.com/foo' + h = copyright.Header(data=data) + self.assertEqual( + ('Foo Bar <[email protected]>', 'http://bar.com/foo'), + h.upstream_contact) + + def test_upstream_contact_multi2_read(self): + data = deb822.Deb822() + data['Format'] = FORMAT + data['Upstream-Contact'] = ( + '\n Foo Bar <[email protected]>\n http://bar.com/foo') + h = copyright.Header(data=data) + self.assertEqual( + ('Foo Bar <[email protected]>', 'http://bar.com/foo'), + h.upstream_contact) + + def test_upstream_contact_single_write(self): + h = copyright.Header() + h.upstream_contact = ['Foo Bar <[email protected]>'] + self.assertEqual(('Foo Bar <[email protected]>',), h.upstream_contact) + self.assertEqual('Foo Bar <[email protected]>', h['Upstream-Contact']) + + def test_upstream_contact_multi_write(self): + h = copyright.Header() + h.upstream_contact = ['Foo Bar <[email protected]>', 'http://bar.com/foo'] + self.assertEqual( + ('Foo Bar <[email protected]>', 'http://bar.com/foo'), + h.upstream_contact) + self.assertEqual( + '\n Foo Bar <[email protected]>\n http://bar.com/foo', + h['upstream-contact']) + + +if __name__ == '__main__': + unittest.main() -- 2.1.0 -- http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-python-debian-maint
