Below is a new pylint checker that checks string formatting operations (e.g.
"%s %s" % ( x, y )).  It will find many different types of errors, for
example in the following file:

x = y = z = 1
print "%(x)s %s" % ( x, y )
print "%s %s %s" % ( x, y )
print "%s %s" % ( x, y, z )
print "%(x)s %(y)s %(z)s" % { 'x' : 1, 'y' : 2 }
print "%(x)s %(y)s" % { 'x' : 1, 'y' : 2, 'z' : 3 }
print "%(x)s %(y)s" % { 'x' : 1, 'y' : 2, 3 : 3 }
print "%(x)s %(y)s" % ( x, y )
print "%s %S" % ( x, y )
print "%s %" % ( x, y )

it will report:

************* Module pylint-stringformat
E9902:  2: Mixing named and unnamed conversion specifiers in format string
E9906:  3: Not enough arguments for format string
E9905:  4: Too many arguments for format string
E9904:  5: Missing key 'z' in format string dictionary
W9901:  6: Unused key 'z' in format string dictionary
W9900:  7: Format string dictionary key should be a string, not 3
E9903:  8: Expected mapping for format string, not Tuple
E9900:  9: Unsupported format character 'S' (0x53) at index 4
E9901: 10: Format string ends in middle of conversion specifier

Please consider this checker for inclusion in pylint.  If you are interested
in doing so, I'd be interested in any feedback you have, and I'd be happy to
develop some unit tests that work with the pylint test framework.

Thanks,
James.

====================================

# Copyright (c) 2009 Arista Networks, Inc.
#
# This program is free software; you can redistribute it and/or modify it
under
# the terms of the GNU General Public License as published by the Free
Software
# Foundation; either version 2 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but
WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
details.
#
# You should have received a copy of the GNU General Public License along
with
# this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
"""Checker for string formatting operations.
"""

import string
from logilab import astng
from pylint.interfaces import IASTNGChecker
from pylint.checkers import BaseChecker

MSGS = {
    'E9900': ("Unsupported format character %r (%#02x) at index %d",
              "Used when a unsupported format character is used in a format
\
              string."),
    'E9901': ("Format string ends in middle of conversion specifier",
              "Used when a format string terminates before the end of a \
              conversion specifier."),
    'E9902': ("Mixing named and unnamed conversion specifiers in format
string",
              "Used when a format string contains both named (e.g.
'%(foo)d') \
              and unnamed (e.g. '%d') conversion specifiers.  This is also \
              used when a named conversion specifier contains * for the \
              minimum field width and/or precision."),
    'E9903': ("Expected mapping for format string, not %s",
              "Used when a format string that uses named conversion
specifiers \
              is used with an argument that is not a mapping."),
    'W9900': ("Format string dictionary key should be a string, not %s",
              "Used when a format string that uses named conversion
specifiers \
              is used with a dictionary whose keys are not all strings."),
    'W9901': ("Unused key %r in format string dictionary",
              "Used when a format string that uses named conversion
specifiers \
              is used with a dictionary that contains keys not required by
the \
              format string."),
    'E9904': ("Missing key %r in format string dictionary",
              "Used when a format string that uses named conversion
specifiers \
              is used with a dictionary that doesn't contain all the keys \
              required by the format string."),
    'E9905': ("Too many arguments for format string",
              "Used when a format string that uses unnamed conversion \
              specifiers is given too few arguments."),
    'E9906': ("Not enough arguments for format string",
              "Used when a format string that uses unnamed conversion \
              specifiers is given too many arguments"),
    }

class IncompleteFormatStringException(Exception):
    """A format string ended in the middle of a format specifier."""
    pass

class UnsupportedFormatCharacterException(Exception):
    """A format character in a format string is not one of the supported
    format characters."""
    def __init__(self, index):
        Exception.__init__(self, index)
        self.index = index

def parseFormatString(formatString):
    """Parses a format string, returning a tuple of (keys, numArgs), where
keys
    is the set of mapping keys in the format string, and numArgs is the
number
    of arguments required by the format string.  Raises
    IncompleteFormatStringException or UnsupportedFormatCharacterException
if a
    parse error occurs."""
    keys = set()
    numArgs = 0

    def nextChar(i):
        i += 1
        if i == len(formatString):
            raise IncompleteFormatStringException
        return (i, formatString[i])

    i = 0
    while i < len(formatString):
        c = formatString[i]
        if c == '%':
            i, c = nextChar(i)

            # Parse the mapping key (optional).
            key = None
            if c == '(':
                depth = 1
                i, c = nextChar(i)
                keyStart = i
                while depth != 0:
                    if c == '(':
                       depth += 1
                    elif c == ')':
                       depth -= 1
                    i, c = nextChar(i)
                keyEnd = i - 1
                key = formatString[keyStart:keyEnd]

            # Parse the conversion flags (optional).
            while c in '#0- +':
                i, c = nextChar(i)

            # Parse the minimum field width (optional).
            if c == '*':
                numArgs += 1
                i, c = nextChar(i)
            else:
                while c in string.digits:
                    i, c = nextChar(i)

            # Parse the precision (optional).
            if c == '.':
                i, c = nextChar(i)
                if c == '*':
                    numArgs += 1
                    i, c = nextChar(i)
                else:
                    while c in string.digits:
                        i, c = nextChar(i)

            # Parse the length modifier (optional).
            if c in 'hlL':
                i, c = nextChar(i)

            # Parse the conversion type (mandatory).
            if c not in 'diouxXeEfFgGcrs%':
                raise UnsupportedFormatCharacterException(i)

            if key:
                keys.add(key)
            elif c != '%':
                numArgs += 1

        i += 1

    return keys, numArgs

class StringFormatChecker(BaseChecker):
    """Checks string formatting operations to ensure that the format string
is
    valid and the arguments match the format string.
    """

    __implements__ = (IASTNGChecker,)

    name = 'stringformat'
    msgs = MSGS

    def visit_binop(self, node):
        if node.op != '%':
            return

        f = node.left
        args = node.right

        if isinstance(f, astng.Const) and isinstance(f.value, basestring):
            formatString = f.value

            try:
                requiredKeys, requiredNumArgs =
parseFormatString(formatString)
            except UnsupportedFormatCharacterException, e:
                c = formatString[e.index]
                self.add_message('E9900', node=node, args=(c, ord(c),
e.index))
            except IncompleteFormatStringException:
                self.add_message('E9901', node=node)
            else:
                if requiredKeys and requiredNumArgs:
                    # The format string uses both named and unnamed format
                    # specifiers.
                    self.add_message('E9902', node=node)

                elif requiredKeys:
                    # The format string uses only named format specifiers.
                    # Check that the RHS of the % operator is a mapping
object
                    # that contains precisely the set of keys required by
the
                    # format string.
                    if isinstance(args, astng.Dict):
                        keys = set()
                        unknownKeys = False

                        for k, v in args.items:
                            if isinstance(k, astng.Const):
                                key = k.value
                                if isinstance(key, basestring):
                                    keys.add(key)
                                else:
                                    self.add_message('W9900',
                                                     node=node,
                                                     args=key)
                            else:
                                # One of the keys was something other than a
                                # constant.  Since we can't tell what it is,
                                # supress checks for missing keys in the
                                # dictionary.
                                unknownKeys = True

                        if not unknownKeys:
                            for key in requiredKeys:
                                if key not in keys:
                                    self.add_message('E9904',
                                                     node=node,
                                                     args=key)
                        for key in keys:
                            if key not in requiredKeys:
                                self.add_message('W9901', node=node,
args=key)

                    elif (isinstance(args, astng.Const) or
                          isinstance(args, astng.Tuple) or
                          isinstance(args, astng.List) or
                          isinstance(args, astng.ListComp) or
                          isinstance(args, astng.GenExpr) or
                          isinstance(args, astng.Backquote) or
                          isinstance(args, astng.Lambda)):
                        typeName = type(args).__name__
                        self.add_message('E9903', node=node, args=typeName)

                    else:
                        # The RHS of the format specifier is a name or
                        # expression.  It may be a mapping object, so
there's
                        # nothing we can check.
                        pass

                else:
                    # The format string uses only unnamed format specifiers.
                    # Check that the number of arguments passed to the RHS
of
                    # the % operator matches the number required by the
format
                    # string.
                    if isinstance(args, astng.Tuple):
                        numArgs = len(args.elts)
                    elif (isinstance(args, astng.Const) or
                          isinstance(args, astng.Dict) or
                          isinstance(args, astng.List) or
                          isinstance(args, astng.ListComp) or
                          isinstance(args, astng.GenExpr) or
                          isinstance(args, astng.Backquote) or
                          isinstance(args, astng.Lambda)):
                        numArgs = 1
                    else:
                        # The RHS of the format specifier is a name or
                        # expression.  It could be a tuple of unknown size,
so
                        # there's nothing we can check.
                        numArgs = None

                    if numArgs is not None:
                        if numArgs > requiredNumArgs:
                            self.add_message('E9905', node=node)
                        elif numArgs < requiredNumArgs:
                            self.add_message('E9906', node=node)

def register(linter):
    """required method to auto register this checker """
    linter.register_checker(StringFormatChecker(linter))
_______________________________________________
Python-Projects mailing list
Python-Projects@lists.logilab.org
http://lists.logilab.org/mailman/listinfo/python-projects

Reply via email to