jenkins-bot has submitted this change and it was merged.
Change subject: [FEAT] Replace: Support multiple replacements
......................................................................
[FEAT] Replace: Support multiple replacements
This supports multiple -fix and command line replacements at once. Each
fix set and also each fix in that set could define it's own edit summary
(the third value in the tuple).
If no user defined summary is given it will only use the summaries of
the applied replacements. For the replacements given via the command
line it will create one merged replacement message. The rest are added
sorted alphabetically after it.
Change-Id: Ic14a8236d143050b267bee9308006b4d14d8ca6d
---
M scripts/replace.py
1 file changed, 160 insertions(+), 103 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
XZise: Looks good to me, but someone else must approve
jenkins-bot: Verified
diff --git a/scripts/replace.py b/scripts/replace.py
index cd9fcfb..014e1dd 100755
--- a/scripts/replace.py
+++ b/scripts/replace.py
@@ -81,6 +81,8 @@
-allowoverlap When occurences of the pattern overlap, replace all of them.
Be careful, this might lead to an infinite loop.
+-fullsummary Use one large summary for all command line replacements.
+
other: First argument is the old text, second argument is the new
text. If the -regex argument is given, the first argument
will be regarded as a regular expression, and the second
@@ -145,6 +147,54 @@
'¶ms;': pagegenerators.parameterHelp,
'&fixes-help;': fixes.help,
}
+
+
+def precompile_exceptions(exceptions, use_regex, flags):
+ if not exceptions:
+ return
+ for exceptionCategory in [
+ 'title', 'require-title', 'text-contains', 'inside']:
+ if exceptionCategory in exceptions:
+ patterns = exceptions[exceptionCategory]
+ if not use_regex:
+ patterns = [re.escape(pattern) for pattern in patterns]
+ patterns = [re.compile(pattern, flags) for pattern in patterns]
+ exceptions[exceptionCategory] = patterns
+
+
+class Replacement(object):
+
+ """The replacement instructions."""
+
+ def __init__(self, old, new, use_regex=None, exceptions=None,
+ case_insensitive=None, edit_summary=None,
+ default_summary=True):
+ self.old = old
+ self.old_regex = None
+ self.new = new
+ self.use_regex = use_regex
+ self.exceptions = exceptions
+ self.case_insensitive = case_insensitive
+ self.edit_summary = edit_summary
+ self.default_summary = default_summary
+
+ def compile(self, flags, use_regex):
+ # Set the regular aexpression flags
+ flags |= re.UNICODE
+
+ if self.case_insensitive is False:
+ flags &= ~re.IGNORECASE
+ elif self.case_insensitive:
+ flags |= re.IGNORECASE
+
+ if self.use_regex is not None:
+ use_regex = self.use_regex # this replacement overrides it
+ if not use_regex:
+ self.old_regex = re.escape(self.old)
+ else:
+ self.old_regex = self.old
+ self.old_regex = re.compile(self.old_regex, flags)
+ precompile_exceptions(self.exceptions, use_regex, flags)
class XmlDumpReplacePageGenerator(object):
@@ -328,30 +378,62 @@
return True
return False
- def doReplacements(self, original_text):
+ def apply_replacements(self, original_text, applied):
"""
Apply all replacements to the given text.
- @rtype: unicode
+ @rtype: unicode, set
"""
+ def get_exceptions(exceptions):
+ return exceptions.get('inside-tags', []) +
exceptions.get('inside', [])
new_text = original_text
- exceptions = []
- if "inside-tags" in self.exceptions:
- exceptions += self.exceptions['inside-tags']
- if "inside" in self.exceptions:
- exceptions += self.exceptions['inside']
- for old, new in self.replacements:
+ exceptions = get_exceptions(self.exceptions)
+ for replacement in self.replacements:
if self.sleep is not None:
time.sleep(self.sleep)
- new_text = textlib.replaceExcept(new_text, old, new, exceptions,
- allowoverlap=self.allowoverlap,
- site=self.site)
+ old_text = new_text
+ new_text = textlib.replaceExcept(
+ new_text, replacement.old_regex, replacement.new,
+ exceptions + get_exceptions(replacement.exceptions or {}),
+ allowoverlap=self.allowoverlap, site=self.site)
+ if old_text != new_text:
+ applied.add(replacement)
+
return new_text
+
+ def doReplacements(self, original_text):
+ return self.apply_replacements(original_text, set())
def count_changes(self, page, err):
"""Count succesfully changed pages."""
if not isinstance(err, Exception):
self.changed_pages += 1
+
+ def generate_summary(self, applied_replacements):
+ """Generate a summary message for the replacements."""
+ # all replacements which are merged into the default message
+ default_summaries = set()
+ # all message parts
+ summary_messages = set()
+ for replacement in applied_replacements:
+ if replacement.edit_summary:
+ summary_messages.add(replacement.edit_summary)
+ elif replacement.default_summary:
+ default_summaries.add((replacement.old, replacement.new))
+ summary_messages = sorted(summary_messages)
+ if default_summaries:
+ if self.summary:
+ summary_messages.insert(0, self.summary)
+ else:
+ default_summary = ', '.join(
+ u'-{0} +{1}'.format(default_summary)
+ for default_summary in default_summaries)
+ summary_messages.insert(0, i18n.twtranslate(
+ pywikibot.Site(), 'replace-replacing',
+ {'description':
+ u' ({0})'.format(default_summary)}
+ ))
+ return u'; '.join(summary_messages)
def run(self):
"""Start the bot."""
@@ -373,6 +455,7 @@
except pywikibot.NoPage:
pywikibot.output(u'Page %s not found' %
page.title(asLink=True))
continue
+ applied = set()
new_text = original_text
while True:
if self.isTextExcepted(new_text):
@@ -380,16 +463,16 @@
u'that is on the exceptions list.'
% page.title(asLink=True))
break
- new_text = self.doReplacements(new_text)
+ last_text = None
+ while new_text != last_text:
+ last_text = new_text
+ new_text = self.apply_replacements(last_text, applied)
+ if not self.recursive:
+ break
if new_text == original_text:
pywikibot.output(u'No changes were necessary in %s'
% page.title(asLink=True))
break
- if self.recursive:
- newest_text = self.doReplacements(new_text)
- while (newest_text != new_text):
- new_text = newest_text
- newest_text = self.doReplacements(new_text)
if hasattr(self, "addedCat"):
cats = page.categories(nofollow_redirects=True)
if self.addedCat not in cats:
@@ -433,12 +516,12 @@
if choice == 'a':
self.acceptall = True
if choice == 'y':
- page.put_async(new_text, self.summary,
callback=self.count_changes)
+ page.put_async(new_text, self.generate_summary(applied),
callback=self.count_changes)
# choice must be 'N'
break
if self.acceptall and new_text != original_text:
try:
- page.put(new_text, self.summary,
callback=self.count_changes)
+ page.put(new_text, self.generate_summary(applied),
callback=self.count_changes)
except pywikibot.EditConflict:
pywikibot.output(u'Skipping %s because of edit conflict'
% (page.title(),))
@@ -479,7 +562,6 @@
add_cat = None
gen = None
# summary message
- summary_commandline = False
edit_summary = u""
# Array which will collect commandline parameters.
# First element is original text, second element is replacement text.
@@ -499,7 +581,7 @@
# as regular expressions?
regex = False
# Predefined fixes from dictionary 'fixes' (see above).
- fix = None
+ fixes_set = []
# the dump's path, either absolute or relative, which will be used
# if -xml flag is present
xmlFilename = None
@@ -555,7 +637,7 @@
elif arg.startswith('-exceptinsidetag:'):
exceptions['inside-tags'].append(arg[17:])
elif arg.startswith('-fix:'):
- fix = arg[5:]
+ fixes_set += [arg[5:]]
elif arg.startswith('-sleep:'):
sleep = float(arg[7:])
elif arg == '-always':
@@ -572,7 +654,6 @@
add_cat = arg[8:]
elif arg.startswith('-summary:'):
edit_summary = arg[9:]
- summary_commandline = True
elif arg.startswith('-allowoverlap'):
allowoverlap = True
else:
@@ -582,83 +663,70 @@
if (len(commandline_replacements) % 2):
raise pywikibot.Error('require even number of replacements.')
- elif (len(commandline_replacements) == 2 and fix is None):
- replacements.append((commandline_replacements[0],
- commandline_replacements[1]))
- if not summary_commandline:
- edit_summary = i18n.twtranslate(
- site, 'replace-replacing',
- {'description': ' (-%s +%s)' % (commandline_replacements[0],
- commandline_replacements[1])}
- )
- elif (len(commandline_replacements) > 1):
- if (fix is None):
- for i in range(0, len(commandline_replacements), 2):
- replacements.append((commandline_replacements[i],
- commandline_replacements[i + 1]))
- if not summary_commandline:
- pairs = [(commandline_replacements[i],
- commandline_replacements[i + 1])
- for i in range(0, len(commandline_replacements), 2)]
- replacementsDescription = '(%s)' % ', '.join(
- [('-' + pair[0] + ' +' + pair[1]) for pair in pairs])
- edit_summary = i18n.twtranslate(site,
- 'replace-replacing',
- {'description':
- replacementsDescription})
+ if not commandline_replacements:
+ if fixes_set:
+ manual = pywikibot.input_yn('Replacements via -fix: set. Apply '
+ 'also manual replacements?',
default=False)
else:
- raise pywikibot.Error(
- 'Specifying -fix with replacements is undefined')
- elif fix is None:
- old = pywikibot.input(u'Please enter the text that should be
replaced:')
- new = pywikibot.input(u'Please enter the new text:')
- change = '(-' + old + ' +' + new
- replacements.append((old, new))
- while True:
- old = pywikibot.input(
- u'Please enter another text that should be replaced,' +
- u'\nor press Enter to start:')
- if old == '':
- change += ')'
- break
- new = i18n.input('pywikibot-enter-new-text')
- change += ' & -' + old + ' +' + new
- replacements.append((old, new))
- if not summary_commandline:
- default_summary_message = i18n.twtranslate(site,
- 'replace-replacing',
- {'description': change})
- pywikibot.output(u'The summary message will default to: %s'
- % default_summary_message)
- summary_message = pywikibot.input(
- u'Press Enter to use this default message, or enter a ' +
- u'description of the\nchanges your bot will make:')
- if summary_message == '':
- summary_message = default_summary_message
- edit_summary = summary_message
+ manual = True
+ if manual:
+ old = pywikibot.input(u'Please enter the text that should be
replaced:')
+ while old:
+ new = pywikibot.input(u'Please enter the new text:')
+ commandline_replacements += [old, new]
+ old = pywikibot.input(
+ u'Please enter another text that should be replaced,' +
+ u'\nor press Enter to start:')
- else:
- # Perform one of the predefined actions.
+ single_summary = None
+ for i in range(0, len(commandline_replacements), 2):
+ replacement = Replacement(commandline_replacements[i],
+ commandline_replacements[i + 1])
+ if not single_summary:
+ single_summary = i18n.twtranslate(
+ site, 'replace-replacing',
+ {'description':
+ ' (-%s +%s)' % (replacement.old, replacement.new)}
+ )
+ replacements.append(replacement)
+
+ if not edit_summary:
+ if single_summary:
+ pywikibot.output(u'The summary message for the command line '
+ 'replacements will be something like: %s'
+ % single_summary)
+ if fixes_set:
+ pywikibot.output('If a summary is defined for the fix, this '
+ 'default summary won\'t be applied.')
+ edit_summary = pywikibot.input(
+ u'Press Enter to use this automatic message, or enter a ' +
+ u'description of the\nchanges your bot will make:')
+
+ # Perform one of the predefined actions.
+ for fix in fixes_set:
try:
fix = fixes.fixes[fix]
except KeyError:
pywikibot.output(u'Available predefined fixes are: %s'
% ', '.join(fixes.fixes.keys()))
return
- if "regex" in fix:
- regex = fix['regex']
if "msg" in fix:
if isinstance(fix['msg'], basestring):
- edit_summary = i18n.twtranslate(site,
- str(fix['msg']))
+ set_summary = i18n.twtranslate(site, str(fix['msg']))
else:
- edit_summary = i18n.translate(site,
- fix['msg'], fallback=True)
- if "exceptions" in fix:
- exceptions = fix['exceptions']
- if "nocase" in fix:
- caseInsensitive = fix['nocase']
- replacements = fix['replacements']
+ set_summary = i18n.translate(site, fix['msg'], fallback=True)
+ else:
+ set_summary = None
+ for replacement in fix['replacements']:
+ summary = set_summary if len(replacement) < 3 else replacement[2]
+ replacements.append(Replacement(
+ old=replacement[0],
+ new=replacement[1],
+ use_regex=fix.get('regex'),
+ edit_summary=summary,
+ exceptions=fix.get('exceptions'),
+ case_insensitive=fix.get('nocase')
+ ))
# Set the regular expression flags
flags = re.UNICODE
@@ -670,21 +738,10 @@
flags = flags | re.MULTILINE
# Pre-compile all regular expressions here to save time later
- for i in range(len(replacements)):
- old, new = replacements[i]
- if not regex:
- old = re.escape(old)
- oldR = re.compile(old, flags)
- replacements[i] = oldR, new
+ for replacement in replacements:
+ replacement.compile(regex, flags)
- for exceptionCategory in [
- 'title', 'require-title', 'text-contains', 'inside']:
- if exceptionCategory in exceptions:
- patterns = exceptions[exceptionCategory]
- if not regex:
- patterns = [re.escape(pattern) for pattern in patterns]
- patterns = [re.compile(pattern, flags) for pattern in patterns]
- exceptions[exceptionCategory] = patterns
+ precompile_exceptions(exceptions, regex, flags)
if xmlFilename:
try:
--
To view, visit https://gerrit.wikimedia.org/r/175228
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ic14a8236d143050b267bee9308006b4d14d8ca6d
Gerrit-PatchSet: 10
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Martineznovo <[email protected]>
Gerrit-Reviewer: Merlijn van Deen <[email protected]>
Gerrit-Reviewer: XZise <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits