Dalba has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/398420 )

Change subject: [draft] dff_checker.py: Add u'' prefix detection for added 
lines Change-Id: I4f0aec3b117b7abd1f1a4610ce074de7dadb6508
......................................................................

[draft] dff_checker.py: Add u'' prefix detection for added lines
Change-Id: I4f0aec3b117b7abd1f1a4610ce074de7dadb6508
---
M scripts/maintenance/diff_checker.py
1 file changed, 53 insertions(+), 23 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/20/398420/1

diff --git a/scripts/maintenance/diff_checker.py 
b/scripts/maintenance/diff_checker.py
index 678b808..df1dd12 100644
--- a/scripts/maintenance/diff_checker.py
+++ b/scripts/maintenance/diff_checker.py
@@ -2,17 +2,20 @@
 # -*- coding: utf-8 -*-
 """Check that the latest commit diff follows the guidelines.
 
-Currently the script only checks line lengths. Lines longer than 79 characters
-should be avoided.
+Currently the script checks the following code style issues:
+    - Line lengths: Newly added lines longer than 79 chars are not allowed.
+    - Unicode literal notations: They are not allowed since we instead import
+        unicode_literals from __future__.
+
 See [[Manual:Pywikibot/Development/Guidelines]] for more info.
 
 Todo: The following rules can be added in the future:
     - For any changes or new lines use single quotes for strings, or double
         quotes  if the string contains a single quote. But keep older code
         unchanged.
-    - Do not use a u'' prefix on strings, as it is meaningless due to
-        __future__.unicode_literals. But keep older code unchanged.
-
+    - Prefer string.format() instead of modulo operator % for string
+        formatting. The modulo operator might be deprecated by a future
+        python release.
 """
 #
 # (C) Pywikibot team, 2017
@@ -22,13 +25,20 @@
 
 from __future__ import absolute_import, unicode_literals
 
-from re import compile as re_compile
+from re import compile as re_compile, IGNORECASE
 from subprocess import check_output
 
 from unidiff import PatchSet
 
 
-IGNORABLE_LONG_LINE = re_compile(r'^\s*(# )?<?https?://\S+>?$').search
+# Regexp for a line that is allowed to be longer than the limit.
+# It does not make sense to break long URLs.
+# 
https://github.com/PyCQA/pylint/blob/d42e74bb9428f895f36808d30bd8a1fe31e28f63/pylintrc#L145
+IGNORABLE_LONG_LINE = re_compile(r'\s*(# )?<?https?://\S+>?$').match
+# The U_PREFIX regex cannot be 100% accurate without tokenizing the whole
+# module, but thee accuracy of this simple regex should be enough for our
+# purposes.
+U_PREFIX = re_compile(r'(.*?)(?=\bur?[\'"])(?<![\'"])', IGNORECASE).match
 
 
 def get_latest_patchset():
@@ -39,37 +49,57 @@
     return PatchSet.from_string(output)
 
 
-def check_line_lengths(latest_patchset):
-    """Check that none of the added/modified lines are longer than 79 chars.
+def print_error(file, line_no, col_no, error):
+    """Print the error."""
+    print(file + ':' + str(line_no) + ':' + str(col_no) + ': ' + error)
 
-    @return: True if line lengths are OK. False otherwise.
+
+def check(latest_patchset):
+    """Check that the added/modified lines do not violate the guideline.
+
+    @return: True if line added/modified OK. False otherwise.
     @rtype: bool
     """
-    found_long = False
+    error = False
     for patched_file in latest_patchset:
         target_file = patched_file.target_file
         if not (target_file.endswith('.py') or patched_file.is_removed_file):
             continue
         for hunk in patched_file:
             for line in hunk:
-                if (
-                    line.is_added and len(line.value) > 80
-                    and not IGNORABLE_LONG_LINE(line.value)
-                ):
-                    print(
-                        target_file + ':' + str(line.target_line_no) + ':'
-                        + str(len(line.value))
-                        + ': line is too long (more than 79 characters)'
+                if not line.is_added:
+                    continue
+                line_val = line.value
+                # Check line length
+                if len(line_val) > 80 and not IGNORABLE_LONG_LINE(line_val):
+                    print_error(
+                        target_file, line.target_line_no, len(line_val),
+                        'newly-added/modified line longer than 79 characters',
                     )
-                    found_long = True
-    return not found_long
+                    error = True
+                # Check that u-prefix is not used
+                u_match = U_PREFIX(line_val)
+                if u_match:
+                    print_error(
+                        target_file, line.target_line_no, u_match.end() + 1,
+                        'newly-added/modified line with u"" prefixed string '
+                        'literal',
+                    )
+                    error = True
+    return not error
 
 
 def main():
     """Run this script."""
     latest_patchset = get_latest_patchset()
-    if not check_line_lengths(latest_patchset):
-        raise SystemExit('diff-checker failed')
+    if not check(latest_patchset):
+        raise SystemExit(
+            'diff-checker failed.\n'
+            'Please review '
+            '<https://www.mediawiki.org/wiki/'
+            'Manual:Pywikibot/Development/Guidelines#Miscellaneous> '
+            'and update your patch-set accordingly.'
+        )
 
 
 if __name__ == '__main__':

-- 
To view, visit https://gerrit.wikimedia.org/r/398420
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I4f0aec3b117b7abd1f1a4610ce074de7dadb6508
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Dalba <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to