On 03-May-2008, Piotr Ożarowski wrote: > [Ben Finney, 2008-05-03 15:22] > > I wanted a command-line tool using 'python-chardet' to report on the > > detected character encoding of arbitrary specified files. So I wrote > > one. > > I didn't add such tool to python-chardet package, because /usr/bin/enca > from enca package does all this and even more.
Perhaps, but it doesn't use 'python-chardet' to do so, leaving Debian currently without a command that uses this library. > > The patch 'chardet-1.0.1_unit-test-scaffold.patch' adds unit test > > support modules that I used to develop the program, and may be > > useful for adding more unit tests in future. > > you forgot to attach the patch :-) Argh. I didn't forget; the patches were eaten by bug #211808 of 'reportbug' <URL:http://bugs.debian.org/211808>. Thanks for letting me know. > please attach it and we'll consider adding it to the package Both patches discussed in the initial bug report are now attached to this message. -- \ "I don't care to belong to a club that accepts people like me | `\ as members." -- Groucho Marx | _o__) | Ben Finney <[EMAIL PROTECTED]>
=== added directory 'test' === added file 'test/minimock.py' --- test/minimock.py 1970-01-01 00:00:00 +0000 +++ test/minimock.py 2008-04-30 03:30:45 +0000 @@ -0,0 +1,279 @@ +# (c) 2006 Ian Bicking, Mike Beachy, and contributors +# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php +r""" +minimock is a simple library for doing Mock objects with doctest. +When using doctest, mock objects can be very simple. + +Here's an example of something we might test, a simple email sender:: + + >>> import smtplib + >>> def send_email(from_addr, to_addr, subject, body): + ... conn = smtplib.SMTP('localhost') + ... msg = 'To: %s\nFrom: %s\nSubject: %s\n\n%s' % ( + ... to_addr, from_addr, subject, body) + ... conn.sendmail(from_addr, [to_addr], msg) + ... conn.quit() + +Now we want to make a mock ``smtplib.SMTP`` object. We'll have to +inject our mock into the ``smtplib`` module:: + + >>> smtplib.SMTP = Mock('smtplib.SMTP') + >>> smtplib.SMTP.mock_returns = Mock('smtp_connection') + +Now we do the test:: + + >>> send_email('[EMAIL PROTECTED]', '[EMAIL PROTECTED]', + ... 'Hi there!', 'How is it going?') + Called smtplib.SMTP('localhost') + Called smtp_connection.sendmail( + '[EMAIL PROTECTED]', + ['[EMAIL PROTECTED]'], + 'To: [EMAIL PROTECTED]: [EMAIL PROTECTED]: Hi there!\n\nHow is it going?') + Called smtp_connection.quit() + +Voila! We've tested implicitly that no unexpected methods were called +on the object. We've also tested the arguments that the mock object +got. We've provided fake return calls (for the ``smtplib.SMTP()`` +constructor). These are all the core parts of a mock library. The +implementation is simple because most of the work is done by doctest. +""" + +__all__ = ["mock", "restore", "Mock"] + +import sys +import inspect + +# A list of mocked objects. Each item is a tuple of (original object, +# namespace dict, object name, and a list of object attributes). +# +mocked = [] + +def lookup_by_name(name, nsdicts): + """ + Look up an object by name from a sequence of namespace dictionaries. + Returns a tuple of (nsdict, object, attributes); nsdict is the + dictionary the name was found in, object is the base object the name is + bound to, and the attributes list is the chain of attributes of the + object that complete the name. + + >>> import os + >>> nsdict, name, attributes = lookup_by_name("os.path.isdir", + ... (locals(),)) + >>> name, attributes + ('os', ['path', 'isdir']) + >>> nsdict, name, attributes = lookup_by_name("os.monkey", (locals(),)) + Traceback (most recent call last): + ... + NameError: name 'os.monkey' is not defined + + """ + for nsdict in nsdicts: + attrs = name.split(".") + names = [] + + while attrs: + names.append(attrs.pop(0)) + obj_name = ".".join(names) + + if obj_name in nsdict: + attr_copy = attrs[:] + tmp = nsdict[obj_name] + try: + while attr_copy: + tmp = getattr(tmp, attr_copy.pop(0)) + except AttributeError: + pass + else: + return nsdict, obj_name, attrs + + raise NameError("name '%s' is not defined" % name) + +def mock(name, nsdicts=None, mock_obj=None, **kw): + """ + Mock the named object, placing a Mock instance in the correct namespace + dictionary. If no iterable of namespace dicts is provided, use + introspection to get the locals and globals of the caller of this + function. + + All additional keyword args are passed on to the Mock object + initializer. + + An example of how os.path.isfile is replaced: + + >>> import os + >>> os.path.isfile + <function isfile at ...> + >>> isfile_id = id(os.path.isfile) + >>> mock("os.path.isfile", returns=True) + >>> os.path.isfile + <Mock ... os.path.isfile> + >>> os.path.isfile("/foo/bar/baz") + Called os.path.isfile('/foo/bar/baz') + True + >>> mock_id = id(os.path.isfile) + >>> mock_id != isfile_id + True + + A second mock object will replace the first, but the original object + will be the one replaced with the replace() function. + + >>> mock("os.path.isfile", returns=False) + >>> mock_id != id(os.path.isfile) + True + >>> restore() + >>> os.path.isfile + <function isfile at ...> + >>> isfile_id == id(os.path.isfile) + True + + """ + if nsdicts is None: + stack = inspect.stack() + try: + # stack[1][0] is the frame object of the caller to this function + globals_ = stack[1][0].f_globals + locals_ = stack[1][0].f_locals + nsdicts = (locals_, globals_) + finally: + del(stack) + + if mock_obj is None: + mock_obj = Mock(name, **kw) + + nsdict, obj_name, attrs = lookup_by_name(name, nsdicts) + + # Get the original object and replace it with the mock object. + tmp = nsdict[obj_name] + if not attrs: + original = tmp + nsdict[obj_name] = mock_obj + else: + for attr in attrs[:-1]: + tmp = getattr(tmp, attr) + original = getattr(tmp, attrs[-1]) + setattr(tmp, attrs[-1], mock_obj) + + mocked.append((original, nsdict, obj_name, attrs)) + +def restore(): + """ + Restore all mocked objects. + + """ + global mocked + + # Restore the objects in the reverse order of their mocking to assure + # the original state is retrieved. + while mocked: + original, nsdict, name, attrs = mocked.pop() + if not attrs: + nsdict[name] = original + else: + tmp = nsdict[name] + for attr in attrs[:-1]: + tmp = getattr(tmp, attr) + setattr(tmp, attrs[-1], original) + return + +class Mock(object): + + def __init__(self, name, returns=None, returns_iter=None, + returns_func=None, raises=None, + outfile=None): + self.mock_name = name + self.mock_returns = returns + if returns_iter is not None: + returns_iter = iter(returns_iter) + self.mock_returns_iter = returns_iter + self.mock_returns_func = returns_func + self.mock_raises = raises + if outfile is None: + outfile = sys.stdout + self.mock_outfile = outfile + self.mock_attrs = {} + + def __repr__(self): + return '<Mock %s %s>' % (hex(id(self)), self.mock_name) + + def __call__(self, *args, **kw): + parts = [repr(a) for a in args] + parts.extend( + '%s=%r' % (items) for items in sorted(kw.items())) + msg = 'Called %s(%s)' % (self.mock_name, ', '.join(parts)) + if len(msg) > 80: + msg = 'Called %s(\n %s)' % ( + self.mock_name, ',\n '.join(parts)) + self.mock_outfile.write("%s\n" % msg) + return self._mock_return(*args, **kw) + + def _mock_return(self, *args, **kw): + if self.mock_raises is not None: + raise self.mock_raises + elif self.mock_returns is not None: + return self.mock_returns + elif self.mock_returns_iter is not None: + try: + return self.mock_returns_iter.next() + except StopIteration: + raise Exception("No more mock return values are present.") + elif self.mock_returns_func is not None: + return self.mock_returns_func(*args, **kw) + else: + return None + + def __getattr__(self, attr): + if attr not in self.mock_attrs: + if self.mock_name: + new_name = self.mock_name + '.' + attr + else: + new_name = attr + self.mock_attrs[attr] = Mock(new_name, + outfile=self.mock_outfile) + return self.mock_attrs[attr] + +__test__ = { + "mock" : + r""" + An additional test for mocking a function accessed directly (i.e. + not via object attributes). + + >>> import os + >>> rename = os.rename + >>> orig_id = id(rename) + >>> mock("rename") + >>> mock_id = id(rename) + >>> mock("rename") + >>> mock_id != id(rename) + True + >>> restore() + >>> orig_id == id(rename) == id(os.rename) + True + + The example from the module docstring, done with the mock/restore + functions. + + >>> import smtplib + >>> def send_email(from_addr, to_addr, subject, body): + ... conn = smtplib.SMTP('localhost') + ... msg = 'To: %s\nFrom: %s\nSubject: %s\n\n%s' % ( + ... to_addr, from_addr, subject, body) + ... conn.sendmail(from_addr, [to_addr], msg) + ... conn.quit() + + >>> mock("smtplib.SMTP", returns=Mock('smtp_connection')) + >>> send_email('[EMAIL PROTECTED]', '[EMAIL PROTECTED]', + ... 'Hi there!', 'How is it going?') + Called smtplib.SMTP('localhost') + Called smtp_connection.sendmail( + '[EMAIL PROTECTED]', + ['[EMAIL PROTECTED]'], + 'To: [EMAIL PROTECTED]: [EMAIL PROTECTED]: Hi there!\n\nHow is it going?') + Called smtp_connection.quit() + >>> restore() + + """, +} + +if __name__ == '__main__': + import doctest + doctest.testmod(optionflags=doctest.ELLIPSIS) === added file 'test/scaffold.py' --- test/scaffold.py 1970-01-01 00:00:00 +0000 +++ test/scaffold.py 2008-04-30 03:47:46 +0000 @@ -0,0 +1,368 @@ +# -*- coding: utf-8 -*- + +# scaffold.py +# +# Copyright © 2007-2008 Ben Finney <[EMAIL PROTECTED]> +# This is free software; you may copy, modify and/or distribute this work +# under the terms of the GNU General Public License, version 2 or later. +# No warranty expressed or implied. See the file LICENSE for details. + +""" Scaffolding for unit test modules +""" + +import unittest +import doctest +import logging +import os +import sys +import textwrap +from StringIO import StringIO +from minimock import ( + Mock, + mock, + restore as mock_restore, + ) + +test_dir = os.path.dirname(os.path.abspath(__file__)) +parent_dir = os.path.dirname(test_dir) +if not test_dir in sys.path: + sys.path.insert(1, test_dir) +if not parent_dir in sys.path: + sys.path.insert(1, parent_dir) +bin_dir = os.path.join(parent_dir, "bin") + +# Disable all but the most critical logging messages +logging.disable(logging.CRITICAL) + + +def suite(module_name): + """ Create the test suite for named module """ + from sys import modules + loader = unittest.TestLoader() + suite = loader.loadTestsFromModule(modules[module_name]) + return suite + +def unittest_main(argv=None): + """ Mainline function for each unit test module """ + + from sys import argv as sys_argv + if not argv: + argv = sys_argv + + exitcode = None + try: + unittest.main(argv=argv, defaultTest='suite') + except SystemExit, e: + exitcode = e.code + + return exitcode + + +def make_module_from_file(module_name, file_name): + """ Make a new module object from the code in specified file """ + + from types import ModuleType + module = ModuleType(module_name) + + module_file = open(file_name, 'r') + exec module_file in module.__dict__ + + return module + + +class TestCase(unittest.TestCase): + """ Test case behaviour """ + + def failUnlessRaises(self, exc_class, func, *args, **kwargs): + """ Fail unless the function call raises the expected exception + + Fail the test if an instance of the exception class + ``exc_class`` is not raised when calling ``func`` with the + arguments ``*args`` and ``**kwargs``. + + """ + + try: + super(TestCase, self).failUnlessRaises( + exc_class, func, *args, **kwargs) + except self.failureException: + exc_class_name = exc_class.__name__ + msg = ( + "Exception %(exc_class_name)s not raised" + " for function call:" + " func=%(func)r args=%(args)r kwargs=%(kwargs)r" + ) % vars() + raise self.failureException(msg) + + + def failIfIs(self, first, second, msg=None): + """ Fail if the two objects are identical + + Fail the test if ``first`` and ``second`` are identical, + as determined by the ``is`` operator. + + """ + + if first is second: + if msg is None: + msg = "%(first)r is %(second)r" % vars() + raise self.failureException(msg) + + def failUnlessIs(self, first, second, msg=None): + """ Fail unless the two objects are identical + + Fail the test unless ``first`` and ``second`` are + identical, as determined by the ``is`` operator. + + """ + + if first is not second: + if msg is None: + msg = "%(first)r is not %(second)r" % vars() + raise self.failureException(msg) + + assertIs = failUnlessIs + assertNotIs = failIfIs + + def failIfIn(self, first, second, msg=None): + """ Fail if the second object is in the first + + Fail the test if ``first`` contains ``second``, as + determined by the ``in`` operator. + + """ + + if second in first: + if msg is None: + msg = "%(second)r is in %(first)r" % vars() + raise self.failureException(msg) + + def failUnlessIn(self, first, second, msg=None): + """ Fail unless the second object is in the first + + Fail the test unless ``first`` contains ``second``, as + determined by the ``in`` operator. + + """ + + if second not in first: + if msg is None: + msg = "%(second)r is not in %(first)r" % vars() + raise self.failureException(msg) + + assertIn = failUnlessIn + assertNotIn = failIfIn + + def failUnlessOutputCheckerMatch(self, want, got, msg=None): + """ Fail unless the specified string matches the expected + + Fail the test unless ``want`` matches ``got``, as + determined by a ``doctest.OutputChecker`` instance. This + is not an equality check, but a pattern match according to + the OutputChecker rules. + + """ + + checker = doctest.OutputChecker() + want = textwrap.dedent(want) + got = textwrap.dedent(got) + if not checker.check_output(want, got, doctest.ELLIPSIS): + if msg is None: + msg = ("Expected %(want)r, got %(got)r:" + "\n--- want: ---\n%(want)s" + "\n--- got: ---\n%(got)s") % vars() + raise self.failureException(msg) + + assertOutputCheckerMatch = failUnlessOutputCheckerMatch + + def failIfIsInstance(self, obj, classes): + """ Fail if the object is an instance of the specified classes + + Fail the test if the object ``obj`` is an instance of any + of ``classes``. + + """ + + if isinstance(obj, classes): + msg = "%(obj)r is an instance of one of %(classes)r" % vars() + raise self.failureException(msg) + + def failUnlessIsInstance(self, obj, classes): + """ Fail unless the object is an instance of the specified classes + + Fail the test unless the object ``obj`` is an instance of + any of ``classes``. + + """ + + if not isinstance(obj, classes): + msg = "%(obj)r is not an instance of any of %(classes)r" % vars() + raise self.failureException(msg) + + assertIsInstance = failUnlessIsInstance + assertNotIsInstance = failIfIsInstance + + def failUnlessFunctionInTraceback(self, traceback, function): + """ Fail if the function is not in the traceback + + Fail the test if the function ``function`` is not at any + of the levels in the traceback object ``traceback``. + + """ + + func_in_traceback = False + expect_code = function.func_code + current_traceback = traceback + while current_traceback is not None: + if expect_code is current_traceback.tb_frame.f_code: + func_in_traceback = True + break + current_traceback = current_traceback.tb_next + + if not func_in_traceback: + msg = ("Traceback did not lead to original function" + " %(function)s" + ) % vars() + raise self.failureException(msg) + + assertFunctionInTraceback = failUnlessFunctionInTraceback + + +class Test_Exception(TestCase): + """ Test cases for exception classes """ + + def __init__(self, *args, **kwargs): + """ Set up a new instance """ + self.valid_exceptions = NotImplemented + super(Test_Exception, self).__init__(*args, **kwargs) + + def setUp(self): + """ Set up test fixtures """ + for exc_type, params in self.valid_exceptions.items(): + args = (None,) * params['min_args'] + params['args'] = args + instance = exc_type(*args) + params['instance'] = instance + + self.iterate_params = make_params_iterator( + default_params_dict = self.valid_exceptions + ) + + super(Test_Exception, self).setUp() + + def test_exception_instance(self): + """ Exception instance should be created """ + for key, params in self.iterate_params(): + instance = params['instance'] + self.failIfIs(None, instance) + + def test_exception_types(self): + """ Exception instances should match expected types """ + for key, params in self.iterate_params(): + instance = params['instance'] + for match_type in params['types']: + match_type_name = match_type.__name__ + fail_msg = ( + "%(instance)r is not an instance of" + " %(match_type_name)s" + ) % vars() + self.failUnless( + isinstance(instance, match_type), + msg=fail_msg) + + +class Test_ProgramMain(TestCase): + """ Test cases for program __main__ function + + Tests a module-level function named __main__ with behaviour + inspired by Guido van Rossum's post "Python main() functions" + <URL:http://www.artima.com/weblogs/viewpost.jsp?thread=4829>. + + It expects: + * the program module has a __main__ function, that: + * accepts an 'argv' argument, defaulting to sys.argv + * instantiates a program application class + * calls the application's main() method, passing argv + * catches SystemExit and returns the error code + * the application behaviour is defined in a class, that: + * has an __init__() method accepting an 'argv' argument as + the commandline argument list to parse + * has a main() method responsible for running the program, + and returning on successful program completion + * raises SystemExit when an abnormal exit is required + """ + + def __init__(self, *args, **kwargs): + """ Set up a new instance """ + self.program_module = NotImplemented + self.application_class = NotImplemented + super(Test_ProgramMain, self).__init__(*args, **kwargs) + + def setUp(self): + """ Set up test fixtures """ + self.mock_outfile = StringIO() + + self.app_class_name = self.application_class.__name__ + self.mock_app = Mock("test_app", outfile=self.mock_outfile) + self.mock_app_class = Mock(self.app_class_name, + outfile=self.mock_outfile) + self.mock_app_class.mock_returns = self.mock_app + mock(self.app_class_name, mock_obj=self.mock_app_class, + nsdicts=[self.program_module.__dict__]) + + super(Test_ProgramMain, self).setUp() + + def tearDown(self): + """ Tear down test fixtures """ + mock_restore() + super(Test_ProgramMain, self).tearDown() + + def test_main_should_instantiate_app(self): + """ __main__() should instantiate application class """ + app_class_name = self.app_class_name + argv = ["foo", "bar"] + expect_mock_output = """\ + Called %(app_class_name)s(%(argv)r)... + """ % vars() + self.program_module.__main__(argv) + self.failUnlessOutputCheckerMatch( + expect_mock_output, self.mock_outfile.getvalue()) + + def test_main_should_call_app_main(self): + """ __main__() should call the application main method """ + argv = ["foo", "bar"] + app_class_name = self.app_class_name + expect_mock_output = """\ + Called %(app_class_name)s(%(argv)r) + Called test_app.main() + """ % vars() + self.program_module.__main__(argv) + self.failUnlessOutputCheckerMatch( + expect_mock_output, self.mock_outfile.getvalue()) + + def test_main_no_argv_should_supply_sys_argv(self): + """ __main__() with no argv should supply sys.argv to application """ + sys_argv_test = ["foo", "bar"] + mock("sys.argv", mock_obj=sys_argv_test) + app_class_name = self.app_class_name + expect_mock_output = """\ + Called %(app_class_name)s(%(sys_argv_test)r) + Called test_app.main() + """ % vars() + self.program_module.__main__() + self.failUnlessOutputCheckerMatch( + expect_mock_output, self.mock_outfile.getvalue()) + + def test_main_should_return_none_on_success(self): + """ __main__() should return None when no SystemExit raised """ + expect_exit_code = None + exit_code = self.program_module.__main__() + self.failUnlessEqual(expect_exit_code, exit_code) + + def test_main_should_return_exit_code_on_system_exit(self): + """ __main__() should return application SystemExit code """ + expect_exit_code = object() + self.mock_app.main.mock_raises = SystemExit(expect_exit_code) + exit_code = self.program_module.__main__() + self.failUnlessEqual(expect_exit_code, exit_code) + === added file 'test/suite.py' --- test/suite.py 1970-01-01 00:00:00 +0000 +++ test/suite.py 2008-04-30 03:08:08 +0000 @@ -0,0 +1,59 @@ +# -*- coding:utf-8; -*- + +# test/suite.py +# Part of chardet, the Universal Encoding Detector. +# +# Copyright © 2008 Ben Finney <[EMAIL PROTECTED]> +# This is free software; you may copy, modify and/or distribute this work +# under the terms of the GNU General Public License, version 2 or later. +# No warranty expressed or implied. See the file COPYING for details. + +""" Unit test suite for chardet +""" + +import unittest +import sys +import os + + +def get_python_modules(file_list, file_suffix = '.py'): + """ Return a list of module names from a filename list """ + python_modules = [m[:m.rfind(file_suffix)] for m in file_list + if m.endswith(file_suffix)] + return python_modules + +def get_test_modules(module_list, module_prefix = 'test_'): + """ Return the list of modules that are named as test modules """ + test_modules = [m for m in module_list + if m.startswith(module_prefix)] + return test_modules + + +def suite(): + """ Create the test suite for this module """ + loader = unittest.TestLoader() + test_dir = os.path.dirname(__file__) + python_modules = get_python_modules(os.listdir(test_dir)) + module_list = get_test_modules(python_modules) + suite = loader.loadTestsFromNames(module_list) + + return suite + + +def __main__(argv=None): + """ Mainline function for this module """ + from sys import argv as sys_argv + if not argv: + argv = sys_argv + + exitcode = None + try: + unittest.main(argv=argv, defaultTest='suite') + except SystemExit, e: + exitcode = e.code + + return exitcode + +if __name__ == '__main__': + exitcode = __main__(sys.argv) + sys.exit(exitcode)
=== added directory 'bin'
=== added file 'bin/detect-encoding'
--- bin/detect-encoding 1970-01-01 00:00:00 +0000
+++ bin/detect-encoding 2008-05-03 05:13:33 +0000
@@ -0,0 +1,159 @@
+#! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# bin/detect-encoding
+# Part of chardet, the Universal Encoding Detector.
+#
+# Copyright © 2008 Ben Finney <[EMAIL PROTECTED]>
+# This is free software; you may copy, modify and/or distribute this work
+# under the terms of the GNU General Public License, version 2 or later.
+# No warranty expressed or implied. See the file COPYING for details.
+
+""" %prog [options] [file ...]
+
+Report heuristically-detected character encoding for each file.
+
+For every specified file (defaulting to stdin if no files are
+specified), reads and determines the character encoding of the file
+content. Reports the name and confidence level for each file's
+detected character encoding.
+"""
+
+import sys
+import optparse
+import chardet
+
+
+class OptionParser(optparse.OptionParser, object):
+ """ Command-line parser for this program """
+
+ def __init__(self, *args, **kwargs):
+ """ Set up a new instance """
+ super(OptionParser, self).__init__(*args, **kwargs)
+
+ global __doc__
+ self.usage = __doc__.strip()
+
+
+def detect_encoding(in_file):
+ """ Detect encoding of text in `in_file`
+
+ Parameters
+ in_file
+ Opened file object to read and examine.
+
+ Return value
+ The mapping as returned by `chardet.detect`.
+
+ """
+ in_data = in_file.read()
+ params = chardet.detect(in_data)
+ return params
+
+
+def report_file_encoding(in_file, encoding_params):
+ """ Return a report of the file's encoding
+
+ Parameters
+ in_file
+ File object being reported. Should have an appropriate
+ `name` attribute.
+
+ encoding_params
+ Mapping as returned by `detect_encoding` on the file's
+ data.
+
+ Return value
+ The report is a single line of text showing filename,
+ detected encoding, and detection confidence.
+
+ """
+ file_name = in_file.name
+ encoding_name = encoding_params['encoding']
+ confidence = encoding_params['confidence']
+ report = (
+ "%(file_name)s: %(encoding_name)s"
+ " (confidence: %(confidence)0.2f)"
+ ) % vars()
+ return report
+
+
+def process_file(in_file):
+ """ Process a single file
+
+ Parameters
+ in_file
+ Opened file object to read and examine.
+
+ Return value
+ None.
+
+ Reads the file contents, detects the encoding, and writes a
+ report line to stdout.
+ """
+ encoding_params = detect_encoding(in_file)
+ encoding_report = report_file_encoding(in_file, encoding_params)
+ message = "%(encoding_report)s\n" % vars()
+ sys.stdout.write(message)
+
+
+class DetectEncodingApp(object):
+ """ Application behaviour for 'detect-encoding' program """
+
+ def __init__(self, argv):
+ """ Set up a new instance """
+ self._parse_commandline(argv)
+
+ def _parse_commandline(self, argv):
+ """ Parse command-line arguments """
+ option_parser = OptionParser()
+ (options, args) = option_parser.parse_args(argv[1:])
+ self.file_names = args
+
+ def _emit_file_error(self, file_name, error):
+ """ Emit an error message regarding file processing """
+ error_name = error.__class__.__name__
+ message = (
+ "%(file_name)s: %(error_name)s: %(error)s\n"
+ ) % vars()
+ sys.stderr.write(message)
+
+ def _process_all_files(self, file_names):
+ """ Process all files in list """
+ if not len(file_names):
+ file_names = [None]
+ for file_name in file_names:
+ try:
+ if file_name is None:
+ file_name = sys.stdin.name
+ in_file = sys.stdin
+ else:
+ in_file = open(file_name)
+ process_file(in_file)
+ except IOError, exc:
+ self._emit_file_error(file_name, exc)
+
+ def main(self):
+ """ Main entry point for application """
+ self._process_all_files(self.file_names)
+
+
+def __main__(argv=None):
+ """ Mainline code for this program """
+
+ from sys import argv as sys_argv
+ if argv is None:
+ argv = sys_argv
+
+ app = DetectEncodingApp(argv)
+ exitcode = None
+ try:
+ app.main()
+ except SystemExit, e:
+ exitcode = e.code
+
+ return exitcode
+
+if __name__ == "__main__":
+ exitcode = __main__(argv=sys.argv)
+ sys.exit(exitcode)
=== added file 'test/test_detect_encoding.py'
--- test/test_detect_encoding.py 1970-01-01 00:00:00 +0000
+++ test/test_detect_encoding.py 2008-05-03 05:09:39 +0000
@@ -0,0 +1,558 @@
+# -*- coding: utf-8; -*-
+
+# test/test_detect_encoding.py
+# Part of chardet, the Universal Encoding Detector.
+#
+# Copyright © 2008 Ben Finney <[EMAIL PROTECTED]>
+# This is free software; you may copy, modify and/or distribute this work
+# under the terms of the GNU General Public License, version 2 or later.
+# No warranty expressed or implied. See the file COPYING for details.
+
+""" Unit test suite for the 'detect-encoding' command-line program
+"""
+
+import __builtin__
+import sys
+import os
+import stat
+from StringIO import StringIO
+
+import scaffold
+from scaffold import TestCase
+from minimock import (
+ Mock,
+ mock,
+ restore as mock_restore,
+ )
+import chardet
+
+
+module_name = 'detect_encoding'
+module_file_under_test = os.path.join(scaffold.bin_dir, 'detect-encoding')
+detect_encoding = scaffold.make_module_from_file(
+ module_name, module_file_under_test
+ )
+
+class Test_ProgramFile(TestCase):
+ """ Test cases for executable program """
+
+ def test_program_is_executable(self):
+ """ Program file should be executable """
+ file_mode = os.stat(module_file_under_test).st_mode
+ exec_bit = stat.S_IEXEC
+ self.failUnlessEqual(exec_bit, (file_mode & exec_bit))
+
+
+class Test_OptionParser(TestCase):
+ """ Test cases for OptionParser class """
+
+ def setUp(self):
+ """ Set up test fixtures """
+ self.instance = detect_encoding.OptionParser()
+
+ def test_usage_contains_module_docstring(self):
+ """ Usage message should contain the module docstring
+
+ To reduce unnecessary duplication, the module docstring
+ for the program should be used for the program usage
+ message.
+
+ """
+ instance = self.instance
+ module_docstring_stripped = detect_encoding.__doc__.strip()
+ self.failUnlessIn(instance.usage, module_docstring_stripped)
+
+ def test_usage_contains_program_name_placeholder(self):
+ """ Usage message should contain program name placeholder
+
+ The OptionParser usage message generation will replace the
+ '%prog' placeholder with the name of the running program,
+ so this placeholder should be in the usage message string.
+
+ """
+ instance = self.instance
+ progname_placeholder = "%prog"
+ self.failUnlessIn(instance.usage, progname_placeholder)
+
+
+def setup_file_encoding_fixture(testcase):
+ """ Set up test parameters for files with encodings """
+ testcase.valid_file_params = {
+ "foo.txt": dict(
+ encoding = "spam.eggs",
+ confidence = 1.0,
+ ),
+ "bar": dict(
+ encoding = "beans.tofu",
+ confidence = 0.333333333,
+ ),
+ "baz.dat": dict(
+ encoding = "bacon.muffin",
+ confidence = 0.666666667,
+ ),
+ }
+
+ for (file_name, params) in testcase.valid_file_params.items():
+ params['file_name'] = file_name
+ instance = StringIO()
+ instance.name = file_name
+ params['instance'] = instance
+ encoding_params = dict(
+ encoding=params['encoding'],
+ confidence=params['confidence'],
+ )
+ params['encoding_params'] = encoding_params
+
+
+class Test_detect_encoding(TestCase):
+ """ Test cases for ``detect_encoding`` function """
+
+ def setUp(self):
+ """ Set up test fixtures """
+ self.mock_outfile = StringIO()
+
+ mock(
+ 'chardet.detect',
+ outfile=self.mock_outfile)
+
+ self.in_data = object()
+ self.in_file = Mock("file", outfile=self.mock_outfile)
+ self.in_file.read.mock_returns = self.in_data
+
+ def test_reads_file_contents(self):
+ """ detect_encoding should read contents of file """
+ expect_mock_output = """\
+ Called file.read()
+ ...
+ """ % vars()
+ dummy = detect_encoding.detect_encoding(self.in_file)
+ self.failUnlessOutputCheckerMatch(
+ expect_mock_output, self.mock_outfile.getvalue()
+ )
+
+ def test_uses_chardet_detect(self):
+ """ detect_encoding should use chardet.detect() """
+ expect_mock_output = """\
+ ...
+ Called chardet.detect(%(in_data)r)
+ """ % vars(self)
+ dummy = detect_encoding.detect_encoding(self.in_file)
+ self.failUnlessOutputCheckerMatch(
+ expect_mock_output, self.mock_outfile.getvalue()
+ )
+
+ def test_returns_expected_parameters(self):
+ """ detect_encoding should return expected encoding parameters """
+ expect_encoding_params = object()
+ chardet.detect.mock_returns = expect_encoding_params
+ encoding_params = detect_encoding.detect_encoding(self.in_file)
+ self.failUnlessIs(expect_encoding_params, encoding_params)
+
+
+class Test_report_file_encoding(TestCase):
+ """ Test cases for ``report_file_encoding`` function """
+
+ def setUp(self):
+ """ Set up test fixtures """
+ self.mock_outfile = StringIO()
+
+ setup_file_encoding_fixture(self)
+
+ def test_report_contains_filename(self):
+ """ report_file_encoding() result should contain filename
+
+ The report text returned from report_file_encoding() should
+ contain the name of the supplied file.
+
+ """
+ for params in self.valid_file_params.values():
+ file_name = params['file_name']
+ in_file = params['instance']
+ encoding_params = params['encoding_params']
+ args = dict(
+ in_file=in_file,
+ encoding_params=encoding_params,
+ )
+ report = detect_encoding.report_file_encoding(**args)
+ self.failUnlessIn(report, in_file.name)
+
+ def test_report_contains_encoding(self):
+ """ report_file_encoding() result should contain encoding
+
+ The report text returned from report_file_encoding()
+ should contain the name of the detected encoding.
+
+ """
+ for params in self.valid_file_params.values():
+ in_file = params['instance']
+ encoding_params = params['encoding_params']
+ encoding_name = encoding_params['encoding']
+ args = dict(
+ in_file=in_file,
+ encoding_params=encoding_params,
+ )
+ report = detect_encoding.report_file_encoding(**args)
+ self.failUnlessIn(report, encoding_name)
+
+ def test_report_contains_confidence_score(self):
+ """ report_file_encoding() result should contain confidence score
+
+ The report text returned from report_file_encoding()
+ should contain the confidence score of the detection.
+
+ """
+ for params in self.valid_file_params.values():
+ in_file = params['instance']
+ encoding_params = params['encoding_params']
+ confidence = encoding_params['confidence']
+ confidence_text = "%(confidence)0.2f" % vars()
+ args = dict(
+ in_file=in_file,
+ encoding_params=encoding_params,
+ )
+ report = detect_encoding.report_file_encoding(**args)
+ self.failUnlessIn(report, confidence_text)
+
+
+class Test_process_file(TestCase):
+ """ Test cases for process_file function """
+
+ def setUp(self):
+ """ Set up test fixtures """
+
+ self.mock_outfile = StringIO()
+
+ self.app_class = detect_encoding.DetectEncodingApp
+ setup_DetectEncodingApp_fixture(self)
+ app_params = self.valid_app_params['no files']
+ self.app_instance = app_params['instance']
+
+ mock(
+ 'detect_encoding.detect_encoding',
+ outfile=self.mock_outfile)
+ detect_encoding.detect_encoding.mock_returns_iter = (
+ f['encoding_params']
+ for f in self.valid_file_params.values()
+ )
+ mock(
+ 'detect_encoding.report_file_encoding',
+ outfile=self.mock_outfile)
+ fake_report_text = str(object())
+ detect_encoding.report_file_encoding.mock_returns = (
+ fake_report_text)
+ mock(
+ 'sys.stdout',
+ outfile=self.mock_outfile)
+
+ def tearDown(self):
+ """ Tear down test fixtures """
+ mock_restore()
+
+ def test_uses_detect_encoding_on_file(self):
+ """ Should call detect_encoding with file parameter """
+ for file_params in self.valid_file_params.values():
+ in_file = file_params['instance']
+ encoding_params = file_params['encoding_params']
+ expect_mock_output = """\
+ Called detect_encoding.detect_encoding(
+ %(instance)r)
+ ...
+ """ % file_params
+ detect_encoding.process_file(in_file)
+ self.failUnlessOutputCheckerMatch(
+ expect_mock_output, self.mock_outfile.getvalue()
+ )
+ self.mock_outfile.truncate(0)
+
+ def test_uses_report_file_encoding_on_parameters(self):
+ """ Should call report_file_encoding with report parameters """
+ for file_params in self.valid_file_params.values():
+ in_file = file_params['instance']
+ expect_mock_output = """\
+ ...
+ Called detect_encoding.report_file_encoding(
+ %(instance)r,
+ %(encoding_params)r)
+ ...
+ """ % file_params
+ detect_encoding.process_file(in_file)
+ self.failUnlessOutputCheckerMatch(
+ expect_mock_output, self.mock_outfile.getvalue()
+ )
+ self.mock_outfile.truncate(0)
+
+ def test_writes_report_to_stdout(self):
+ """ Should write report for file to stdout """
+ for file_params in self.valid_file_params.values():
+ fake_report_text = str(object())
+ detect_encoding.report_file_encoding.mock_returns = (
+ fake_report_text)
+ in_file = file_params['instance']
+ expect_mock_output = """\
+ ...
+ Called sys.stdout.write('...%(fake_report_text)s...')
+ """ % vars()
+ detect_encoding.process_file(in_file)
+ self.failUnlessOutputCheckerMatch(
+ expect_mock_output, self.mock_outfile.getvalue()
+ )
+ self.mock_outfile.truncate(0)
+
+
+def setup_DetectEncodingApp_fixture(testcase):
+ """ Set up a DetectEncodingApp test fixture on the test case
+
+ `testcase`
+ The unit test case to which the fixture should be applied.
+ Must have an attribute `app_class` which is the type to
+ instantiate for the application instances.
+
+ The fixture is applied as the `valid_apps` attribute, and is a
+ dict of parameter dicts to use in the test case.
+
+ """
+
+ setup_file_encoding_fixture(testcase)
+
+ testcase.valid_app_params = {
+ 'no files': dict(
+ file_names = [],
+ ),
+ 'one file': dict(
+ file_names = [
+ "foo.txt",
+ ],
+ ),
+ 'three files': dict(
+ file_names = [
+ "foo.txt", "bar", "baz.dat",
+ ],
+ ),
+ }
+
+ for params in testcase.valid_app_params.values():
+ argv = []
+ cmd_args = params.get('cmd_args', ["foo_prog"])
+ file_names = params['file_names']
+ in_files = []
+ for file_name in file_names:
+ in_file = StringIO()
+ in_file.name = file_name
+ in_files.append(in_file)
+ else:
+ in_file = StringIO()
+ in_file.name = "<stdin>"
+ in_files = [in_file]
+ params['in_files'] = in_files
+ cmd_args.extend(file_names)
+ argv.extend(cmd_args)
+ params['argv'] = argv
+ args = dict(
+ argv=argv
+ )
+ params['args'] = args
+ instance = testcase.app_class(**args)
+ params['instance'] = instance
+
+
+class Test_DetectEncodingApp_init(TestCase):
+ """ Test cases for DetectCodingApp class initialisation """
+
+ def setUp(self):
+ """ Set up test fixtures """
+
+ self.mock_outfile = StringIO()
+
+ self.app_class = detect_encoding.DetectEncodingApp
+ setup_DetectEncodingApp_fixture(self)
+
+ def tearDown(self):
+ """ Tear down test fixtures """
+ mock_restore()
+
+ def test_requires_argv(self):
+ """ Shoudl require argv parameter """
+ args = dict()
+ self.failUnlessRaises(TypeError, self.app_class, **args)
+
+ def test_parses_args(self):
+ """ Should parse command-line arguments """
+ for params in self.valid_app_params.values():
+ args = params['args']
+ argv = args['argv']
+
+ argv_to_parse = argv[1:]
+ args_return = argv_to_parse
+ mock_option_parser = Mock(
+ "OptionParser",
+ outfile=self.mock_outfile)
+ stub_parse_args_return = (object(), args_return)
+ mock_option_parser.parse_args.mock_returns = (
+ stub_parse_args_return)
+ mock(
+ 'detect_encoding.OptionParser',
+ returns=mock_option_parser,
+ outfile=self.mock_outfile)
+
+ expect_mock_output = """\
+ Called detect_encoding.OptionParser()
+ Called OptionParser.parse_args(%(argv_to_parse)r)
+ """ % vars()
+ instance = self.app_class(**args)
+ self.failUnlessOutputCheckerMatch(
+ expect_mock_output, self.mock_outfile.getvalue())
+ self.mock_outfile.truncate(0)
+
+ def test_stores_specified_args(self):
+ """ Should store specified command-line arguments """
+ for params in self.valid_app_params.values():
+ instance = params['instance']
+ app_args = params['args']
+ expect_file_names = params['file_names']
+ self.failUnlessEqual(expect_file_names, instance.file_names)
+
+
+class Test_DetectEncodingApp_main(TestCase):
+ """ Test cases for DetectEncodingApp.main method """
+
+ def setUp(self):
+ """ Set up test fixtures """
+
+ self.mock_outfile = StringIO()
+
+ self.app_class = detect_encoding.DetectEncodingApp
+ setup_DetectEncodingApp_fixture(self)
+
+ mock(
+ 'detect_encoding.process_file',
+ outfile=self.mock_outfile)
+ fake_file = self.valid_file_params['foo.txt']['instance']
+ mock(
+ '__builtin__.open',
+ returns=fake_file,
+ outfile=self.mock_outfile)
+
+ def tearDown(self):
+ """ Tear down test fixtures """
+ mock_restore()
+
+ def test_with_no_files_processes_stdin(self):
+ """ With no files specified, should process sys.stdin """
+ params = self.valid_app_params['no files']
+ instance = params['instance']
+ in_file = Mock("sys.stdin", outfile=self.mock_outfile)
+ mock(
+ 'sys.stdin', mock_obj=in_file)
+ expect_mock_output = """\
+ Called detect_encoding.process_file(%(in_file)r)
+ """ % vars()
+ instance.main()
+ mock_restore()
+ self.failUnlessOutputCheckerMatch(
+ expect_mock_output, self.mock_outfile.getvalue()
+ )
+
+ def test_processes_specified_files(self):
+ """ With filenames, should process each file in turn """
+ params = self.valid_app_params['three files']
+ file_names = params['file_names']
+ instance = params['instance']
+ in_files = []
+ expect_mock_output_segments = []
+ for file_name in file_names:
+ in_file = self.valid_file_params[file_name]['instance']
+ in_files.append(in_file)
+ expect_mock_output_segments.append(
+ ("""\
+ Called __builtin__.open(%(file_name)r)
+ Called detect_encoding.process_file(%(in_file)r)"""
+ ) % vars()
+ )
+ mock(
+ '__builtin__.open', returns_iter=in_files,
+ outfile=self.mock_outfile)
+ expect_mock_output = "\n".join(expect_mock_output_segments) + "\n"
+ instance.main()
+ mock_restore()
+ self.failUnlessOutputCheckerMatch(
+ expect_mock_output, self.mock_outfile.getvalue()
+ )
+
+ def test_emits_message_on_open_ioerror(self):
+ """ IOError from open should cause error message """
+ params = self.valid_app_params['one file']
+ instance = params['instance']
+ error_instance = IOError("Badness!")
+ error_name = error_instance.__class__.__name__
+ __builtin__.open.mock_raises = error_instance
+ mock(
+ 'sys.stderr',
+ outfile=self.mock_outfile)
+ expect_mock_output = """\
+ ...
+ Called sys.stderr.write('...%(error_name)s...%(error_instance)s...')
+ """ % vars()
+ instance.main()
+ mock_restore()
+ self.failUnlessOutputCheckerMatch(
+ expect_mock_output, self.mock_outfile.getvalue()
+ )
+
+ def test_continues_to_next_file_after_ioerror(self):
+ """ Should proceed to next file after IOError """
+ params = self.valid_app_params['three files']
+ instance = params['instance']
+ file_names = params['file_names']
+ error_instance = IOError("Badness!")
+ mock(
+ 'sys.stderr',
+ outfile=self.mock_outfile)
+ error_file_name = file_names[1]
+ def stub_open(file_name, *args, **kwargs):
+ if file_name == error_file_name:
+ raise error_instance
+ else:
+ return Mock("file", outfile=self.mock_outfile)
+ mock(
+ '__builtin__.open', mock_obj=stub_open,
+ outfile=self.mock_outfile)
+ expect_mock_output = """\
+ Called detect_encoding.process_file(...)
+ Called sys.stderr.write(...)
+ Called detect_encoding.process_file(...)
+ """ % vars()
+ instance.main()
+ mock_restore()
+ self.failUnlessOutputCheckerMatch(
+ expect_mock_output, self.mock_outfile.getvalue()
+ )
+
+ def test_emits_message_on_detect_encoding_ioerror(self):
+ """ IOError from detect_encoding should cause error message """
+ params = self.valid_app_params['one file']
+ instance = params['instance']
+ error_instance = IOError("Badness!")
+ error_name = error_instance.__class__.__name__
+ detect_encoding.process_file.mock_raises = error_instance
+ mock(
+ 'sys.stderr',
+ outfile=self.mock_outfile)
+ expect_mock_output = """\
+ ...
+ Called sys.stderr.write('...%(error_name)s...%(error_instance)s...')
+ """ % vars()
+ instance.main()
+ mock_restore()
+ self.failUnlessOutputCheckerMatch(
+ expect_mock_output, self.mock_outfile.getvalue()
+ )
+
+
+class Test_ProgramMain(scaffold.Test_ProgramMain):
+ """ Test cases for program __main__ function """
+
+ def setUp(self):
+ """ Set up a new instance """
+ self.program_module = detect_encoding
+ self.application_class = detect_encoding.DetectEncodingApp
+ super(Test_ProgramMain, self).setUp()
signature.asc
Description: Digital signature

