[MediaWiki-commits] [Gerrit] Fix anomalous escape (\) - change (pywikibot/core)

John Vandenberg (Code Review) Wed, 11 Feb 2015 01:18:47 -0800

John Vandenberg has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/189914


Change subject: Fix anomalous escape (\)
......................................................................

Fix anomalous escape (\)

Change-Id: I8bcea1d5db3b5d0c2c9ada19382ada01f7339044
---
M pywikibot/__init__.py
M pywikibot/botirc.py
M pywikibot/i18n.py
M pywikibot/page.py
M pywikibot/pagegenerators.py
M pywikibot/textlib.py
M scripts/add_text.py
M scripts/archivebot.py
M scripts/casechecker.py
M scripts/category.py
M scripts/checkimages.py
M scripts/commonscat.py
M scripts/cosmetic_changes.py
M scripts/image.py
M scripts/imagerecat.py
M scripts/interwiki.py
M scripts/maintenance/compat2core.py
M scripts/reflinks.py
M scripts/replace.py
M scripts/revertbot.py
M scripts/script_wui.py
M scripts/selflink.py
M scripts/solve_disambiguation.py
M scripts/template.py
M scripts/weblinkchecker.py
25 files changed, 121 insertions(+), 112 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/14/189914/1

diff --git a/pywikibot/__init__.py b/pywikibot/__init__.py
index b93f53f..b7161d8 100644
--- a/pywikibot/__init__.py
+++ b/pywikibot/__init__.py
@@ -398,7 +398,7 @@
     @classmethod
     def fromTimestr(cls, datetimestr, precision=14, before=0, after=0,
                     timezone=0, calendarmodel=None, site=None):
-        match = re.match('([-+]?\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)Z',
+        match = re.match(r'([-+]?\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)Z',
                          datetimestr)
         if not match:
             raise ValueError(u"Invalid format: '%s'" % datetimestr)
diff --git a/pywikibot/botirc.py b/pywikibot/botirc.py
index ce5e1d3..5c9fd54 100644
--- a/pywikibot/botirc.py
+++ b/pywikibot/botirc.py
@@ -50,7 +50,7 @@
         self.channel = channel
         self.site = site
         self.other_ns = re.compile(
-            u'14\[\[07(' + u'|'.join([item[0] for item in
+            u'14\\[\\[07(' + u'|'.join([item[0] for item in
                                         list(site.namespaces().values()) if 
item[0]]) + u')')
         self.api_url = self.site.apipath()
         self.api_url += 
'?action=query&meta=siteinfo&siprop=statistics&format=xml'
diff --git a/pywikibot/i18n.py b/pywikibot/i18n.py
index bd9b418..f0bdb23 100644
--- a/pywikibot/i18n.py
+++ b/pywikibot/i18n.py
@@ -28,7 +28,7 @@
 if sys.version_info[0] > 2:
     basestring = (str, )
 
-PLURAL_PATTERN = '{{PLURAL:(?:%\()?([^\)]*?)(?:\)d)?\|(.*?)}}'
+PLURAL_PATTERN = r'{{PLURAL:(?:%\()?([^\)]*?)(?:\)d)?\|(.*?)}}'
 
 # Package name for the translation messages
 messages_package_name = 'scripts.i18n'
diff --git a/pywikibot/page.py b/pywikibot/page.py
index ea260c7..4a65bef 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -3296,7 +3296,7 @@
         # to the namespace case=first-letter.
 
         # Validate the title is 'Q' and a positive integer.
-        if not re.match('^Q[1-9]\d*$', self._link.title):
+        if not re.match(r'^Q[1-9]\d*$', self._link.title):
             raise pywikibot.InvalidTitle(
                 u"'%s' is not a valid item page title"
                 % self._link.title)
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index 1a73e8f..9a66176 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -730,7 +730,7 @@
             p = re.compile(r'(?<!\\),')  # Match "," only if there no "\" 
before
             temp = []  # Array to store split argument
             for arg in p.split(claim):
-                temp.append(arg.replace('\,', ',').split('='))
+                temp.append(arg.replace(r'\,', ',').split('='))
             self.claimfilter_list.append((temp[0][0], temp[0][1],
                                           dict(temp[1:]), ifnot))
             return True
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 145ea5f..aa40e10 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -32,7 +32,8 @@
 from pywikibot.tools import OrderedDict
 
 TEMP_REGEX = re.compile(
-    
'{{(?:msg:)?(?P<name>[^{\|]+?)(?:\|(?P<params>[^{]+?(?:{[^{]+?}[^{]*?)?))?}}')
+    
r'{{(?:msg:)?(?P<name>[^{\|]+?)(?:\|(?P<params>[^{]+?(?:{[^{]+?}[^{]*?)?))?}}')
+
 NON_LATIN_DIGITS = {
     'ckb': u'٠١٢٣٤٥٦٧٨٩',
     'fa': u'۰۱۲۳۴۵۶۷۸۹',
@@ -174,8 +175,8 @@
         marker1 = findmarker(text)
         marker2 = findmarker(text, u'##', u'#')
         Rvalue = re.compile('{{{.+?}}}')
-        Rmarker1 = re.compile('%(mark)s(\d+)%(mark)s' % {'mark': marker1})
-        Rmarker2 = re.compile('%(mark)s(\d+)%(mark)s' % {'mark': marker2})
+        Rmarker1 = re.compile(r'%(mark)s(\d+)%(mark)s' % {'mark': marker1})
+        Rmarker2 = re.compile(r'%(mark)s(\d+)%(mark)s' % {'mark': marker2})
         # hide the flat template marker
         dontTouchRegexes.append(Rmarker1)
         origin = text
@@ -600,9 +601,9 @@
                     # Do we have a noinclude at the end of the template?
                     parts = s2.split(includeOff)
                     lastpart = parts[-1]
-                    if re.match('\s*%s' % marker, lastpart):
+                    if re.match(r'\s*%s' % marker, lastpart):
                         # Put the langlinks back into the noinclude's
-                        regexp = re.compile('%s\s*%s' % (includeOff, marker))
+                        regexp = re.compile(r'%s\s*%s' % (includeOff, marker))
                         newtext = regexp.sub(s + includeOff, s2)
                     else:
                         # Put the langlinks at the end, inside noinclude's
@@ -734,7 +735,7 @@
                          site=site)
     if marker:
         # avoid having multiple linefeeds at the end of the text
-        text = re.sub('\s*%s' % re.escape(marker), config.LS + marker,
+        text = re.sub(r'\s*%s' % re.escape(marker), config.LS + marker,
                       text.strip())
     return text.strip()
 
@@ -920,12 +921,12 @@
     # Note: While allowing dots inside URLs, MediaWiki will regard
     # dots at the end of the URL as not part of that URL.
     # The same applies to comma, colon and some other characters.
-    notAtEnd = '\]\s\.:;,<>"\|\)'
+    notAtEnd = r'\]\s\.:;,<>"\|\)'
     # So characters inside the URL can be anything except whitespace,
     # closing squared brackets, quotation marks, greater than and less
     # than, and the last character also can't be parenthesis or another
     # character disallowed by MediaWiki.
-    notInside = '\]\s<>"'
+    notInside = r'\]\s<>"'
     # The first half of this regular expression is required because '' is
     # not allowed inside links. For example, in this wiki text:
     #       ''Please see https://www.example.org.''
@@ -1216,7 +1217,7 @@
 
     """
     # match preceding colon for text links
-    section = re.sub(r'\\\[\\\[(\\:)?', '\[\[\:?', re.escape(section))
+    section = re.sub(r'\\\[\\\[(\\:)?', r'\[\[\:?', re.escape(section))
     # match underscores and white spaces
     section = re.sub(r'\\?[ _]', '[ _]', section)
     m = re.search("=+[ ']*%s[ ']*=+" % section, pagetext)
diff --git a/scripts/add_text.py b/scripts/add_text.py
index ae21071..c5d22aa 100644
--- a/scripts/add_text.py
+++ b/scripts/add_text.py
@@ -193,7 +193,7 @@
             allstars = []
             starstext = textlib.removeDisabledParts(text)
             for star in starsList:
-                regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
+                regex = re.compile(r'(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
                                    % star, re.I)
                 found = regex.findall(starstext)
                 if found != []:
diff --git a/scripts/archivebot.py b/scripts/archivebot.py
index 588e3be..993c454 100644
--- a/scripts/archivebot.py
+++ b/scripts/archivebot.py
@@ -186,7 +186,7 @@
     'B' (bytes) or 'T' (threads).
 
     """
-    r = re.search('(\d+) *([BkKMT]?)', string)
+    r = re.search(r'(\d+) *([BkKMT]?)', string)
     val, unit = (int(r.group(1)), r.group(2))
     if unit == 'M':
         val *= 1024
diff --git a/scripts/casechecker.py b/scripts/casechecker.py
index e77e856..9061710 100644
--- a/scripts/casechecker.py
+++ b/scripts/casechecker.py
@@ -117,7 +117,7 @@
     latClrFnt = u'<font color=brown>'
     suffixClr = u'</font>'
 
-    wordBreaker = re.compile(u'[ _\-/\|#[\]():]')
+    wordBreaker = re.compile(u'[ _\\-/\\|#[\\]():]')
     stripChars = u' \t,'
 
     titles = True
diff --git a/scripts/category.py b/scripts/category.py
index 2236f11..4e2fcc2 100755
--- a/scripts/category.py
+++ b/scripts/category.py
@@ -303,7 +303,7 @@
         site = pagelink.site
         # regular expression that matches a name followed by a space and
         # disambiguation brackets. Group 1 is the name without the rest.
-        bracketsR = re.compile('(.*) \(.+?\)')
+        bracketsR = re.compile(r'(.*) \(.+?\)')
         match_object = bracketsR.match(page_name)
         if match_object:
             page_name = match_object.group(1)
diff --git a/scripts/checkimages.py b/scripts/checkimages.py
index 7b94bdb..351b040 100644
--- a/scripts/checkimages.py
+++ b/scripts/checkimages.py
@@ -561,6 +561,17 @@
 
 # END OF CONFIGURATION.
 
+SETTINGS_REGEX = r = re.compile(
+    r"<------- ------->\n"
+    r"\*[Nn]ame ?= ?['\"](.*?)['\"]\n"
+    r"\*([Ff]ind|[Ff]indonly)=(.*?)\n"
+    r"\*[Ii]magechanges=(.*?)\n"
+    r"\*[Ss]ummary=['\"](.*?)['\"]\n"
+    r"\*[Hh]ead=['\"](.*?)['\"]\n"
+    r"\*[Tt]ext ?= ?['\"](.*?)['\"]\n"
+    r"\*[Mm]ex ?= ?['\"]?([^\n]*?)['\"]?\n",
+    re.UNICODE | re.DOTALL)
+
 
 class LogIsFull(pywikibot.Error):
 
@@ -1159,19 +1170,9 @@
                 self.settingsData = list()
                 try:
                     testo = wikiPage.get()
-                    r = re.compile(
-                        r"<------- ------->\n"
-                        "\*[Nn]ame ?= ?['\"](.*?)['\"]\n"
-                        "\*([Ff]ind|[Ff]indonly)=(.*?)\n"
-                        "\*[Ii]magechanges=(.*?)\n"
-                        "\*[Ss]ummary=['\"](.*?)['\"]\n"
-                        "\*[Hh]ead=['\"](.*?)['\"]\n"
-                        "\*[Tt]ext ?= ?['\"](.*?)['\"]\n"
-                        "\*[Mm]ex ?= ?['\"]?([^\n]*?)['\"]?\n",
-                        re.UNICODE | re.DOTALL)
                     number = 1
 
-                    for m in r.finditer(testo):
+                    for m in SETTINGS_REGEX.finditer(testo):
                         name = str(m.group(1))
                         find_tipe = str(m.group(2))
                         find = str(m.group(3))
diff --git a/scripts/commonscat.py b/scripts/commonscat.py
index b078987..90343ee 100755
--- a/scripts/commonscat.py
+++ b/scripts/commonscat.py
@@ -219,6 +219,8 @@
            u'分类重定向', u'追蹤分類', u'共享資源', u'追蹤分類'],
 }
 
+TEMPLATE_REGEX = u'(?i)\\{\\{%s\\|?[^{}]*(?:\\{\\{.*\\}\\})?\\}\\}'
+
 
 class CommonscatBot(Bot):
 
@@ -348,23 +350,20 @@
         if not linktitle and (page.title().lower() in oldcat.lower() or
                               oldcat.lower() in page.title().lower()):
             linktitle = oldcat
+
         if linktitle and newcat != page.title(withNamespace=False):
-            newtext = re.sub(u'(?i)\{\{%s\|?[^{}]*(?:\{\{.*\}\})?\}\}'
-                             % oldtemplate,
-                             u'{{%s|%s|%s}}' % (newtemplate, newcat, 
linktitle),
-                             page.get())
+            replacement_text = u'{{%s|%s|%s}}'
         elif newcat == page.title(withNamespace=False):
-            newtext = re.sub(u'(?i)\{\{%s\|?[^{}]*(?:\{\{.*\}\})?\}\}'
-                             % oldtemplate,
-                             u'{{%s}}' % newtemplate,
-                             page.get())
+            replacement_text = u'{{%s}}' % newtemplate
         elif oldcat.strip() != newcat:  # strip trailing white space
-            newtext = re.sub(u'(?i)\{\{%s\|?[^{}]*(?:\{\{.*\}\})?\}\}'
-                             % oldtemplate,
-                             u'{{%s|%s}}' % (newtemplate, newcat),
-                             page.get())
+            replacement_text = u'{{%s|%s}}' % (newtemplate, newcat)
         else:  # nothing left to do
             return
+
+        newtext = re.sub(TEMPLATE_REGEX % oldtemplate,
+                         replacement_text,
+                         page.text)
+
         if self.getOption('summary'):
             comment = self.getOption('summary')
         else:
diff --git a/scripts/cosmetic_changes.py b/scripts/cosmetic_changes.py
index 3d5f5b2..379904c 100755
--- a/scripts/cosmetic_changes.py
+++ b/scripts/cosmetic_changes.py
@@ -143,10 +143,10 @@
 deprecatedTemplates = {
     'wikipedia': {
         'de': [
-            (u'Belege', u'Belege fehlen\g<parameters>'),
-            (u'Quelle', u'Belege fehlen\g<parameters>'),
-            (u'Quellen', u'Belege fehlen\g<parameters>'),
-            (u'Quellen fehlen', u'Belege fehlen\g<parameters>'),
+            (u'Belege', u'Belege fehlen\\g<parameters>'),
+            (u'Quelle', u'Belege fehlen\\g<parameters>'),
+            (u'Quellen', u'Belege fehlen\\g<parameters>'),
+            (u'Quellen fehlen', u'Belege fehlen\\g<parameters>'),
         ],
     }
 }
@@ -337,7 +337,7 @@
             # Removing the stars' issue
             starstext = textlib.removeDisabledParts(text)
             for star in starsList:
-                regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
+                regex = re.compile(r'(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
                                    % star, re.I)
                 found = regex.findall(starstext)
                 if found != []:
@@ -431,8 +431,8 @@
                 text = textlib.replaceExcept(
                     text,
                     r'\[\[(?P<left>.+?:.+?\..+?\|) *(' + '|'.join(aliases) +
-                    ') *(?P<right>(\|.*?)?\]\])',
-                    r'[[\g<left>' + aliases[0] + '\g<right>', exceptions)
+                    r') *(?P<right>(\|.*?)?\]\])',
+                    r'[[\g<left>' + aliases[0] + r'\g<right>', exceptions)
         return text
 
     def cleanUpLinks(self, text):
@@ -637,7 +637,7 @@
             text = textlib.replaceExcept(
                 text,
                 
r'(?m)^(?P<bullet>[:;]*(\*+|#+)[:;\*#]*)(?P<char>[^\s\*#:;].+?)',
-                '\g<bullet> \g<char>',
+                r'\g<bullet> \g<char>',
                 exceptions)
         return text
 
@@ -809,8 +809,8 @@
         # do not change inside file links
         namespaces = list(self.site.namespace(6, all=True))
         pattern = re.compile(
-            u'\[\[(' + '|'.join(namespaces) +
-            '):.+?\.\w+? *(\|((\[\[.*?\]\])|.)*)?\]\]',
+            u'\\[\\[(' + '|'.join(namespaces) +
+            u'):.+?\\.\\w+? *(\\|((\\[\\[.*?\\]\\])|.)*)?\\]\\]',
             re.UNICODE)
         # not to let bot edits in latin content
         exceptions.append(re.compile(u"[^%(fa)s] *?\"*? *?, *?[^%(fa)s]"
@@ -834,7 +834,7 @@
         for i in range(0, 10):
             text = textlib.replaceExcept(text, old[i], new[i], exceptions)
         # do not change digits in class, style and table params
-        pattern = re.compile(u'\w+=(".+?"|\d+)', re.UNICODE)
+        pattern = re.compile(u'\\w+=(".+?"|\\d+)', re.UNICODE)
         exceptions.append(pattern)
         # do not change digits inside html-tags
         pattern = re.compile(u'<[/]*?[^</]+?[/]*?>', re.UNICODE)
diff --git a/scripts/image.py b/scripts/image.py
index 00758e4..9434229 100644
--- a/scripts/image.py
+++ b/scripts/image.py
@@ -155,7 +155,10 @@
 
         if self.new_image:
             if not self.getOption('loose'):
-                replacements.append((image_regex, '[[' + 
self.site.image_namespace() + ':' + self.new_image + '\g<parameters>]]'))
+                replacements.append((image_regex,
+                                     r'[[%s:%s\g<parameters>]]'
+                                     % (self.site.image_namespace(),
+                                        self.new_image)))
             else:
                 replacements.append((image_regex, self.new_image))
         else:
diff --git a/scripts/imagerecat.py b/scripts/imagerecat.py
index ab8d03f..571b486 100644
--- a/scripts/imagerecat.py
+++ b/scripts/imagerecat.py
@@ -147,10 +147,10 @@
         return [], [], []
 
     commonsenseRe = re.compile(
-        
'^#COMMONSENSE(.*)#USAGE(\s)+\((?P<usagenum>(\d)+)\)\s(?P<usage>(.*))\s'
-        '#KEYWORDS(\s)+\((?P<keywords>(\d)+)\)(.*)'
-        '#CATEGORIES(\s)+\((?P<catnum>(\d)+)\)\s(?P<cats>(.*))\s'
-        '#GALLERIES(\s)+\((?P<galnum>(\d)+)\)\s(?P<gals>(.*))\s(.*)#EOF$',
+        
r'^#COMMONSENSE(.*)#USAGE(\s)+\((?P<usagenum>(\d)+)\)\s(?P<usage>(.*))\s'
+        r'#KEYWORDS(\s)+\((?P<keywords>(\d)+)\)(.*)'
+        r'#CATEGORIES(\s)+\((?P<catnum>(\d)+)\)\s(?P<cats>(.*))\s'
+        r'#GALLERIES(\s)+\((?P<galnum>(\d)+)\)\s(?P<gals>(.*))\s(.*)#EOF$',
         re.MULTILINE + re.DOTALL)
 
     gotInfo = False
@@ -275,7 +275,7 @@
     project = ''
     article = ''
     usageRe = re.compile(
-        '^(?P<lang>([\w-]+))\.(?P<project>([\w]+))\.org:(?P<articles>\s(.*))')
+        r'^(?P<lang>([\w-]+))\.(?P<project>([\w]+))\.org:(?P<articles>\s(.*))')
     matches = usageRe.search(use)
     if matches:
         if matches.group('lang'):
@@ -377,7 +377,7 @@
         toFilter = toFilter + "[[Category:" + cat + "]]\n"
     parameters = urlencode({'source': toFilter.encode('utf-8'),
                                    'bot': '1'})
-    filterCategoriesRe = re.compile('\[\[Category:([^\]]*)\]\]')
+    filterCategoriesRe = re.compile(r'\[\[Category:([^\]]*)\]\]')
     try:
         filterCategoriesPage = urlopen(
             "https://toolserver.org/~multichill/filtercats.php?%s"; % 
parameters)
@@ -416,10 +416,10 @@
 def removeTemplates(oldtext=u''):
     """Remove {{Uncategorized}} and {{Check categories}} templates."""
     result = re.sub(
-        u'\{\{\s*([Uu]ncat(egori[sz]ed( 
image)?)?|[Nn]ocat|[Nn]eedscategory)[^}]*\}\}', u'', oldtext)
+        u'{{\\s*([Uu]ncat(egori[sz]ed( 
image)?)?|[Nn]ocat|[Nn]eedscategory)[^}]*}}', u'', oldtext)
     result = re.sub(u'<!-- Remove this line once you have added categories 
-->',
                     u'', result)
-    result = re.sub(u'\{\{\s*[Cc]heck categories[^}]*\}\}', u'', result)
+    result = re.sub(u'\\{\\{\\s*[Cc]heck categories[^}]*\\}\\}', u'', result)
     return result
 
 
diff --git a/scripts/interwiki.py b/scripts/interwiki.py
index 93cb2f8..f17bc0c 100755
--- a/scripts/interwiki.py
+++ b/scripts/interwiki.py
@@ -1850,7 +1850,7 @@
         interwikis = [pywikibot.Page(l) for l in page.iterlanglinks()]
 
         # remove interwiki links to ignore
-        for iw in re.finditer('<!-- *\[\[(.*?:.*?)\]\] *-->', pagetext):
+        for iw in re.finditer(r'<!-- *\[\[(.*?:.*?)\]\] *-->', pagetext):
             try:
                 ignorepage = pywikibot.Page(page.site, iw.groups()[0])
                 if (new[ignorepage.site] == ignorepage) and \
diff --git a/scripts/maintenance/compat2core.py 
b/scripts/maintenance/compat2core.py
index a8b2c58..386d509 100644
--- a/scripts/maintenance/compat2core.py
+++ b/scripts/maintenance/compat2core.py
@@ -52,32 +52,33 @@
     ('import catlib\r?\n', ''),
     ('import userlib\r?\n', ''),
     # change wikipedia to pywikibot, exclude URLs
-    ('(?<!\.)wikipedia\.', u'pywikibot.'),
+    (r'(?<!\.)wikipedia\.', u'pywikibot.'),
     # site instance call
-    ('pywikibot\.getSite\s*\(\s*', 'pywikibot.Site('),
+    (r'pywikibot\.getSite\s*\(\s*', 'pywikibot.Site('),
     # lang is different from code. We should use code in core
-    ('([Ss])ite.lang(?:uage\(\))?', r'\1ite.code'),
+    (r'([Ss])ite.lang(?:uage\(\))?', r'\1ite.code'),
     # change compat library classes to pywikibot intrinsic classes
-    ('catlib\.Category\s*\(\s*', 'pywikibot.Category('),
-    ('catlib\.change_category\s*\((\s*)(?P<article>.+?),\s*(?P<oldcat>.+?),',
+    (r'catlib\.Category\s*\(\s*', 'pywikibot.Category('),
+    (r'catlib\.change_category\s*\((\s*)(?P<article>.+?),\s*(?P<oldcat>.+?),',
      r'\g<article>.change_category(\1\g<oldcat>,'),
-    ('userlib\.User\s*\(\s*', 'pywikibot.User('),
+    (r'userlib\.User\s*\(\s*', 'pywikibot.User('),
     # change ImagePage to FilePage
-    ('pywikibot\.ImagePage\s*\(\s*', 'pywikibot.FilePage('),
+    (r'pywikibot\.ImagePage\s*\(\s*', 'pywikibot.FilePage('),
     # deprecated title methods
-    ('\.urlname\s*\(\s*\)', '.title(asUrl=True)'),
-    ('\.urlname\s*\(\s*(?:withNamespace\s*=\s*)?(True|False)+\s*\)',
+    (r'\.urlname\s*\(\s*\)', '.title(asUrl=True)'),
+    (r'\.urlname\s*\(\s*(?:withNamespace\s*=\s*)?(True|False)+\s*\)',
      r'.title(asUrl=True, withNamespace=\1)'),
-    ('\.titleWithoutNamespace\s*\(\s*\)', '.title(withNamespace=False)'),
-    ('\.sectionFreeTitle\s*\(\s*\)', '.title(withSection=False)'),
-    ('\.aslink\s*\(\s*\)', '.title(asLink=True)'),
+    (r'\.titleWithoutNamespace\s*\(\s*\)', '.title(withNamespace=False)'),
+    (r'\.sectionFreeTitle\s*\(\s*\)', '.title(withSection=False)'),
+    (r'\.aslink\s*\(\s*\)', '.title(asLink=True)'),
     # other deprecated methods
-    ('(?<!site)\.encoding\s*\(\s*\)', '.site.encoding()'),
-    ('\.newimages\s*\(', '.newfiles('),
+    (r'(?<!site)\.encoding\s*\(\s*\)', '.site.encoding()'),
+    (r'\.newimages\s*\(', '.newfiles('),
     # new core methods
-    ('\.get\s*\(\s*get_redirect\s*=\s*True\s*\)', '.text'),
+    (r'\.get\s*\(\s*get_redirect\s*=\s*True\s*\)', '.text'),
     # stopme() is done by the framework itself
-    
('(\s*)try\:\s*\r?\n\s+main\(\)\s*\r?\n\s*finally\:\s*\r?\n\s+pywikibot\.stopme\(\)',
+    (r'(\s*)try\:\s*\r?\n\s+main\(\)\s*\r?\n\s*finally\:\s*\r?\n'
+     r'\s+pywikibot\.stopme\(\)',
      r'\1main()'),
 )
 
diff --git a/scripts/reflinks.py b/scripts/reflinks.py
index f50083b..ae63562 100644
--- a/scripts/reflinks.py
+++ b/scripts/reflinks.py
@@ -122,7 +122,7 @@
 # Extracts the domain name
 domain = re.compile(r'^(\w+)://(?:www.|)([^/]+)')
 
-globalbadtitles = """
+globalbadtitles = r"""
 # is
 (test|
 # starts with
@@ -306,9 +306,9 @@
         self.REFS = re.compile(
             u'(?i)<ref(?P<params>[^>/]*)>(?P<content>.*?)</ref>')
         self.NAMES = re.compile(
-            u'(?i).*name\s*=\s*(?P<quote>"?)\s*(?P<name>.+)\s*(?P=quote).*')
+            
u'(?i).*name\\s*=\\s*(?P<quote>"?)\\s*(?P<name>.+)\\s*(?P=quote).*')
         self.GROUPS = re.compile(
-            u'(?i).*group\s*=\s*(?P<quote>"?)\s*(?P<group>.+)\s*(?P=quote).*')
+            
u'(?i).*group\\s*=\\s*(?P<quote>"?)\\s*(?P<group>.+)\\s*(?P=quote).*')
         self.autogen = i18n.twtranslate(pywikibot.Site(), 'reflinks-autogen')
 
     def process(self, text):
@@ -401,7 +401,7 @@
             if v[1]:
                 name = u'"%s"' % name
             text = re.sub(
-                u'<ref name\s*=\s*(?P<quote>"?)\s*%s\s*(?P=quote)\s*/>' % k,
+                u'<ref name\\s*=\\s*(?P<quote>"?)\\s*%s\\s*(?P=quote)\\s*/>' % 
k,
                 u'<ref name=%s />' % name, text)
         return text
 
@@ -662,7 +662,7 @@
                         s = self.CHARSET.search(tag)
                 if s:
                     tmp = s.group('enc').strip("\"' ").lower()
-                    naked = re.sub('[ _\-]', '', tmp)
+                    naked = re.sub(r'[ _\-]', '', tmp)
                     # Convert to python correct encoding names
                     if naked == "gb2312":
                         enc.append("gbk")
diff --git a/scripts/replace.py b/scripts/replace.py
index 95fe144..cbc9a43 100755
--- a/scripts/replace.py
+++ b/scripts/replace.py
@@ -627,9 +627,9 @@
 
 def prepareRegexForMySQL(pattern):
     """Convert regex to MySQL syntax."""
-    pattern = pattern.replace('\s', '[:space:]')
-    pattern = pattern.replace('\d', '[:digit:]')
-    pattern = pattern.replace('\w', '[:alnum:]')
+    pattern = pattern.replace(r'\s', '[:space:]')
+    pattern = pattern.replace(r'\d', '[:digit:]')
+    pattern = pattern.replace(r'\w', '[:alnum:]')
 
     pattern = pattern.replace("'", "\\" + "'")
     # pattern = pattern.replace('\\', '\\\\')
diff --git a/scripts/revertbot.py b/scripts/revertbot.py
index 161263e..0815a15 100644
--- a/scripts/revertbot.py
+++ b/scripts/revertbot.py
@@ -131,7 +131,7 @@
         if 'top' in item:
             page = pywikibot.Page(self.site, item['title'])
             text = page.get(get_redirect=True)
-            pattern = re.compile(u'\[\[.+?:.+?\..+?\]\]', re.UNICODE)
+            pattern = re.compile(u'\\[\\[.+?:.+?\\..+?\\]\\]', re.UNICODE)
             return pattern.search(text) >= 0
         return False
 
diff --git a/scripts/script_wui.py b/scripts/script_wui.py
index 7250589..fd6e792 100755
--- a/scripts/script_wui.py
+++ b/scripts/script_wui.py
@@ -280,7 +280,7 @@
     # (might be a problem here for TS and SGE, output string has another 
encoding)
     if False:
         buffer = buffer.decode(pywikibot.config.console_encoding)
-    buffer = re.sub("\03\{(.*?)\}(.*?)\03\{default\}", "\g<2>", buffer)
+    buffer = re.sub(r'\03\{(.*?)\}(.*?)\03\{default\}', r'\g<2>', buffer)
     if rev is None:
         rev = page.latestRevision()
         link = page.permalink(oldid=rev)
diff --git a/scripts/selflink.py b/scripts/selflink.py
index 1687c97..734cf39 100644
--- a/scripts/selflink.py
+++ b/scripts/selflink.py
@@ -53,7 +53,7 @@
         self.linkR = re.compile(
             r'\[\[(?P<title>[^\]\|#]*)'
             r'(?P<section>#[^\]\|]*)?'
-            '(\|(?P<label>[^\]]*))?\]\]'
+            r'(\|(?P<label>[^\]]*))?\]\]'
             r'(?P<linktrail>' + linktrail + ')')
 
     def handleNextLink(self, page, match, context=100):
diff --git a/scripts/solve_disambiguation.py b/scripts/solve_disambiguation.py
index f25f617..f36b28e 100644
--- a/scripts/solve_disambiguation.py
+++ b/scripts/solve_disambiguation.py
@@ -202,7 +202,7 @@
         'en': [
             u'Wikipedia:Links to disambiguating pages',
             u'Wikipedia:Disambiguation pages with links',
-            u'Wikipedia:Multiple-place names \([A-Z]\)',
+            u'Wikipedia:Multiple-place names \\([A-Z]\\)',
             u'Wikipedia:Non-unique personal name',
             u"User:Jerzy/Disambiguation Pages i've Editted",
             u'User:Gareth Owen/inprogress',
@@ -252,7 +252,7 @@
             # hu:Wikipédia:Kocsmafal (egyéb)#Hol nem kell egyértelműsíteni?
             # 2012-02-08
             u'Wikipédia:(?!Sportműhely/Eddigi cikkeink).*',
-            u'.*\(egyértelműsítő lap\)$',
+            u'.*\\(egyértelműsítő lap\\)$',
             u'.*[Vv]ita:.*',
             u'Szerkesztő:[^/]+$',
         ],
@@ -293,7 +293,7 @@
             u'Overleg Wikipedia:Logboek.*',
             u'Wikipedia:Logboek.*',
             u'Overleg gebruiker:Sybren/test.*',
-            u'Overleg 
gebruiker:[0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?',
+            u'Overleg gebruiker:([0-9][0-9]?[0-9]?\\.){3}[0-9][0-9]?[0-9]?',
             u'Overleg:Lage Landen (staatkunde)',
             u'Wikipedia:.*[aA]rchief.*',
             u'Wikipedia:Doorverwijspagina',
@@ -757,7 +757,7 @@
                     search_text = text[m.end():m.end() + context]
                     # figure out where the link (and sentance) ends, put note
                     # there
-                    end_of_word_match = re.search("\s", search_text)
+                    end_of_word_match = re.search(r'\s', search_text)
                     if end_of_word_match:
                         position_split = end_of_word_match.start(0)
                     else:
diff --git a/scripts/template.py b/scripts/template.py
index de98b32..25aad55 100755
--- a/scripts/template.py
+++ b/scripts/template.py
@@ -265,11 +265,11 @@
 
             if self.getOption('subst') and self.getOption('remove'):
                 replacements.append((templateRegex,
-                                     '{{subst:%s\g<parameters>}}' % new))
+                                     r'{{subst:%s\g<parameters>}}' % new))
                 exceptions['inside-tags'] = ['ref', 'gallery']
             elif self.getOption('subst'):
                 replacements.append((templateRegex,
-                                     '{{subst:%s\g<parameters>}}' % old))
+                                     r'{{subst:%s\g<parameters>}}' % old))
                 exceptions['inside-tags'] = ['ref', 'gallery']
             elif self.getOption('remove'):
                 replacements.append((templateRegex, ''))
@@ -281,7 +281,7 @@
                                               default=False, 
automatic_quit=False):
                         continue
                 replacements.append((templateRegex,
-                                     '{{%s\g<parameters>}}' % new))
+                                     r'{{%s\g<parameters>}}' % new))
 
         replaceBot = replace.ReplaceRobot(self.generator, replacements,
                                           exceptions, 
acceptall=self.getOption('always'),
diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py
index 4c36069..968d763 100644
--- a/scripts/weblinkchecker.py
+++ b/scripts/weblinkchecker.py
@@ -128,26 +128,30 @@
     # Officially reserved for testing, documentation, etc. in
     # https://tools.ietf.org/html/rfc2606#page-2
     # top-level domains:
-    re.compile('.*[\./@]test(/.*)?'),
-    re.compile('.*[\./@]example(/.*)?'),
-    re.compile('.*[\./@]invalid(/.*)?'),
-    re.compile('.*[\./@]localhost(/.*)?'),
+    re.compile(r'.*[\./@]test(/.*)?'),
+    re.compile(r'.*[\./@]example(/.*)?'),
+    re.compile(r'.*[\./@]invalid(/.*)?'),
+    re.compile(r'.*[\./@]localhost(/.*)?'),
     # second-level domains:
-    re.compile('.*[\./@]example\.com(/.*)?'),
-    re.compile('.*[\./@]example\.net(/.*)?'),
-    re.compile('.*[\./@]example\.org(/.*)?'),
+    re.compile(r'.*[\./@]example\.com(/.*)?'),
+    re.compile(r'.*[\./@]example\.net(/.*)?'),
+    re.compile(r'.*[\./@]example\.org(/.*)?'),
 
     # Other special cases
-    re.compile('.*[\./@]gso\.gbv\.de(/.*)?'),  # bot somehow can't handle 
their redirects
-    re.compile('.*[\./@]berlinonline\.de(/.*)?'),
+    # bot somehow can't handle their redirects:
+    re.compile(r'.*[\./@]gso\.gbv\.de(/.*)?'),
+    re.compile(r'.*[\./@]berlinonline\.de(/.*)?'),
     # above entry to be manually fixed per request at 
[[de:Benutzer:BLueFiSH.as/BZ]]
-    re.compile('.*[\./@]bodo\.kommune\.no(/.*)?'),  # bot can't handle their 
redirects
-    re.compile('.*[\./@]jpl\.nasa\.gov(/.*)?'),  # bot rejected on the site
-    re.compile('.*[\./@]itis\.gov(/.*)?'),  # bot rejected on the site
-    re.compile('.*[\./@]cev\.lu(/.*)?'),  # bot rejected on the site
-    re.compile('.*[\./@]science\.ksc\.nasa\.gov(/.*)?'),  # very slow response 
resulting in bot error
-    re.compile('.*[\./@]britannica\.com(/.*)?'),  # HTTP redirect loop
-    re.compile('.*[\./@]quickfacts\.census\.gov(/.*)?'),  # bot rejected on 
the site
+    # bot can't handle their redirects:
+    re.compile(r'.*[\./@]bodo\.kommune\.no(/.*)?'),
+    re.compile(r'.*[\./@]jpl\.nasa\.gov(/.*)?'),  # bot rejected on the site
+    re.compile(r'.*[\./@]itis\.gov(/.*)?'),  # bot rejected on the site
+    re.compile(r'.*[\./@]cev\.lu(/.*)?'),  # bot rejected on the site
+    # very slow response resulting in bot error:
+    re.compile(r'.*[\./@]science\.ksc\.nasa\.gov(/.*)?'),
+    re.compile(r'.*[\./@]britannica\.com(/.*)?'),  # HTTP redirect loop
+    # bot rejected on the site:
+    re.compile(r'.*[\./@]quickfacts\.census\.gov(/.*)?'),
 ]
 
 

-- 
To view, visit https://gerrit.wikimedia.org/r/189914
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I8bcea1d5db3b5d0c2c9ada19382ada01f7339044
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] Fix anomalous escape (\) - change (pywikibot/core)

Reply via email to