Hi, Following on from my patch to fix an incorrect warning for \0, this one adds a new warning for octal character escapes in a string. A typical problematic example is:
re.sub('\w+,\w+', '\1 \2', a) and a bit more motivation is that pep-3127<http://www.python.org/dev/peps/pep-3127/> deprecates the 0-prefixed syntax for integers (not for character escapes.) Of course this is more of a warning about something that might be unintentional, and people might reasonably choose to use them, for something like '\033' (escape). But, it's easy to turn it off or rephrase that as '\x1b'. -- Martin --- pylint/checkers/format.py 2013-01-02 16:00:12.000000000 +1100 +++ pylint/checkers/format.py 2013-01-08 11:43:44.000000000 +1100 @@ -385,16 +385,18 @@ 'anomalous-unicode-escape-in-string', 'Used when an escape like \\u is encountered in a byte ' 'string where it has no effect.'), + 'W1403': ('Octal escape \'%s\'.', + 'octal-escape', + 'Octal escape sequences (other than \\0) are rarely used ' + 'and may be unintentional. For a backreference in a regexp, ' + 'use a raw string.'), } name = 'string_constant' __implements__ = (IRawChecker, IASTNGChecker) # Characters that have a special meaning after a backslash in either # Unicode or byte strings. - ESCAPE_CHARACTERS = 'abfnrtvx\n\r\t\\\'\"01234567' - - # TODO(mbp): Octal characters are quite an edge case today; people may - # prefer a separate warning where they occur. \0 should be allowed. + ESCAPE_CHARACTERS = 'abfnrtvx\n\r\t\\\'\"' # Characters that have a special meaning after a backslash but only in # Unicode strings. @@ -455,7 +457,9 @@ # of the string would be a SyntaxError. next_char = string_body[i+1] match = string_body[i:i+2] - if next_char in self.UNICODE_ESCAPE_CHARACTERS: + if next_char in '01234567': + self._check_octal(start_row, string_body[i+1:i+4]) + elif next_char in self.UNICODE_ESCAPE_CHARACTERS: if 'u' in prefix: pass elif _PY3K and 'b' not in prefix: @@ -469,6 +473,28 @@ # character can never be the start of a new backslash escape. i += 2 + def _check_octal(self, start_row, escape_content): + """Warn about non-zero octal escapes. + + Motivating example: re.sub('\\w+', 'Hello \\1!', name) + + start_row: integer line number in the source. + escape_contents: Up to three characters following the backslash, + starting with an octal digit. + """ + is_nonzero = False + for i_digit, ch in enumerate(escape_content): + if ch == '0': + pass + elif ch in '1234567': + is_nonzero = True + else: + # Non-numeric character is not part of the escape. + i_digit -= 1 + break + if is_nonzero: + self.add_message('W1403', line=start_row, + args=('\\' + escape_content[:i_digit + 1],)) def register(linter): """required method to auto register this checker """ --- pylint/test/input/func_excess_escapes.py 2013-01-02 16:00:12.000000000 +1100 +++ pylint/test/input/func_excess_escapes.py 2013-01-08 11:41:09.000000000 +1100 @@ -1,4 +1,4 @@ -# pylint:disable=W0105, W0511 +# pylint: disable=pointless-string-statement """Stray backslash escapes may be missing a raw-string prefix.""" __revision__ = '$Id$' @@ -12,8 +12,8 @@ NEWLINE = "\n" OLD_ESCAPES = '\a\b\f\n\t\r\v' HEX = '\xad\x0a\x0d' -FALSE_OCTAL = '\o123\o000' # Not octal in Python -OCTAL = '\123\000' +FALSE_OCTAL = '\o123\o000' # Not octal, even in py3k +OCTAL = '\123\000' # pylint: disable=octal-escape NOT_OCTAL = '\888\999' NUL = '\0' UNICODE = u'\u1234' --- /dev/null 2013-01-07 10:44:38.067492040 +1100 +++ pylint/test/input/func_octal.py 2013-01-02 16:56:38.000000000 +1100 @@ -0,0 +1,29 @@ +# pylint:disable=W0105, W0511 +"""Octal escapes are often unintentional, especially in a re substitution.""" + +__revision__ = '$Id$' + +OCTAL = '\177' +'\020' +'\001\7' +NUL = '\0\00\000' + +# Regexp backreferences are ok in a raw string. +r'Dear \1, please \2 \3' + +# Also caught in docstrings +"""Replaces \1 with \2""" + +# This is ok +"""Replaces \\1 with \\2""" + +# Nul, followed by digits that are not part of the escape. +"\000123" +"\00089" + +# Or followed by other characters +"\000abc" + +# Short octal sequence, with following non-octal digits +"\089" +"\009" --- /dev/null 2013-01-07 10:44:38.067492040 +1100 +++ pylint/test/messages/func_octal.txt 2012-12-24 17:40:15.000000000 +1100 @@ -0,0 +1,7 @@ +W: 6: Octal escape '\177'. +W: 7: Octal escape '\020'. +W: 8: Octal escape '\001'. +W: 8: Octal escape '\7'. +W: 15: Octal escape '\1'. +W: 15: Octal escape '\2'. +
_______________________________________________ Python-Projects mailing list Python-Projects@lists.logilab.org http://lists.logilab.org/mailman/listinfo/python-projects