jenkins-bot has submitted this change and it was merged.
Change subject: [FEAT] unlink: Use replace_links
......................................................................
[FEAT] unlink: Use replace_links
This uses replace_links to unlink all the links via the unlink script. It
modifies InteractiveReplace to also store the current link information and adds
a way to ask the user again. It also adds an option to increase the context.
Change-Id: I7056c773761a5d1673d6e1d0ad5252861ff0d02f
---
M pywikibot/bot.py
M scripts/unlink.py
2 files changed, 169 insertions(+), 145 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/bot.py b/pywikibot/bot.py
index 9f0e686..2410708 100644
--- a/pywikibot/bot.py
+++ b/pywikibot/bot.py
@@ -77,6 +77,15 @@
self.page = page
+class UnhandledAnswer(Exception):
+
+ """The given answer didn't suffice."""
+
+ def __init__(self, stop=False):
+ """Constructor."""
+ self.stop = stop
+
+
# Logging module configuration
class RotatingFileHandler(logging.handlers.RotatingFileHandler):
@@ -719,6 +728,10 @@
"""Handle this choice. Must be implemented."""
raise NotImplementedError()
+ def handle_link(self):
+ """The current link will be handled by this choice."""
+ return False
+
class StaticChoice(Choice):
@@ -756,6 +769,30 @@
**kwargs)
+class AlwaysChoice(Choice):
+
+ """Add an option to always apply the default."""
+
+ def __init__(self, replacer, option='always', shortcut='a'):
+ """Constructor."""
+ super(AlwaysChoice, self).__init__(option, shortcut, replacer)
+ self.always = False
+
+ def handle(self):
+ """Handle the custom shortcut."""
+ self.always = True
+ return self.answer
+
+ def handle_link(self):
+ """Directly return answer whether it's applying it always."""
+ return self.always
+
+ @property
+ def answer(self):
+ """Get the actual default answer instructing the replacement."""
+ return self.replacer.handle_answer(self.replacer._default)
+
+
class InteractiveReplace(object):
"""
@@ -770,9 +807,11 @@
* allow_replace_all = False (replace target, section and label)
(The boolean values are the default values)
- It has also a 'context' attribute which must be a non-negative integer. If
+ It has also a C{context} attribute which must be a non-negative integer. If
it is greater 0 it shows that many characters before and after the link in
- question.
+ question. The C{context_delta} attribute can be defined too and adds an
+ option to increase C{context} by the given amount each time the option is
+ selected.
Additional choices can be defined using the 'additional_choices' and will
be
amended to the choices defined by this class. This list is mutable and the
@@ -788,8 +827,9 @@
@type old_link: Link or Page
@param new_link: The new link with which it should be replaced.
Depending on the replacement mode it'll use this link's label and
- section.
- @type new_link: Link or Page
+ section. If False it'll unlink all and the attributes beginning
with
+ allow_replace are ignored.
+ @type new_link: Link or Page or False
@param default: The default answer as the shortcut
@type default: None or str
@param automatic_quit: Add an option to quit and raise a
@@ -808,6 +848,7 @@
self._quit = automatic_quit
self._current_match = None
self.context = 30
+ self.context_delta = 0
self.allow_skip_link = True
self.allow_unlink = True
self.allow_replace = True
@@ -818,15 +859,18 @@
self._own_choices = [
('skip_link', StaticChoice('Do not change', 'n', None)),
('unlink', StaticChoice('Unlink', 'u', False)),
- ('replace', StaticChoice('Change link target', 't',
- self._new.canonical_title())),
- ('replace_section', LinkChoice('Change link target and section',
- 's', self, True)),
- ('replace_label', LinkChoice('Change link target and label',
- 'l', self, False)),
- ('replace_all', StaticChoice('Change complete link', 'c',
- self._new)),
]
+ if self._new:
+ self._own_choices += [
+ ('replace', StaticChoice('Change link target', 't',
+ self._new.canonical_title())),
+ ('replace_section', LinkChoice('Change link target and
section',
+ 's', self, True)),
+ ('replace_label', LinkChoice('Change link target and label',
+ 'l', self, False)),
+ ('replace_all', StaticChoice('Change complete link', 'c',
+ self._new)),
+ ]
self.additional_choices = []
@@ -838,42 +882,67 @@
else:
raise ValueError('Invalid choice "{0}"'.format(choice))
+ def __call__(self, link, text, groups, rng):
+ """Ask user how the selected link should be replaced."""
+ if self._old == link:
+ self._current_match = (link, text, groups, rng)
+ while True:
+ try:
+ answer = self.handle_link()
+ except UnhandledAnswer as e:
+ if e.stop:
+ raise
+ else:
+ break
+ self._current_match = None # don't reset in case of an exception
+ return answer
+ else:
+ return None
+
@property
def choices(self):
"""Return the tuple of choices."""
choices = []
for name, choice in self._own_choices:
if getattr(self, 'allow_' + name):
- choices += [choice]
+ choices += [self._own_choices[name]]
+ if self.context_delta > 0:
+ choices += [HighlightContextOption(
+ 'more context', 'm', self.current_text, self.context,
+ self.context_delta, *self.current_range)]
choices += self.additional_choices
return tuple(choices)
- def __call__(self, link, text, groups, rng):
- """Ask user how the selected link should be replaced."""
- if self._old == link:
- self._current_match = (link, text, groups, rng)
- if self.context > 0:
- # at the beginning of the link, start red color.
- # at the end of the link, reset the color to default
- pywikibot.output(text[max(0, rng[0] - self.context): rng[0]] +
- '\03{lightred}' + text[rng[0]: rng[1]] +
- '\03{default}' + text[rng[1]: rng[1] +
self.context])
- question = ('Should the link target to '
- '\03{{lightpurple}}{0}\03{{default}}?')
- else:
- question = ('Should the link \03{{lightred}}{1}\03{{default}} '
- 'target to \03{{lightpurple}}{0}\03{{default}}?')
+ def handle_link(self):
+ """Handle the currently given replacement."""
+ choices = self.choices
+ for choice in choices:
+ if isinstance(choice, Choice) and choice.handle_link():
+ return choice.answer
- choice = pywikibot.input_choice(
- question.format(self._new.canonical_title(),
- self._old.canonical_title()),
- self.choices, default=self._default, automatic_quit=self._quit)
-
- answer = self.handle_answer(choice, link)
- self._current_match = None
- return answer
+ if self.context > 0:
+ rng = self.current_range
+ text = self.current_text
+ # at the beginning of the link, start red color.
+ # at the end of the link, reset the color to default
+ pywikibot.output(text[max(0, rng[0] - self.context): rng[0]] +
+ '\03{lightred}' + text[rng[0]: rng[1]] +
+ '\03{default}' + text[rng[1]: rng[1] +
self.context])
+ question = 'Should the link '
else:
- return None
+ question = 'Should the link \03{{lightred}}{0}\03{{default}} '
+
+ if self._new is False:
+ question += 'be unlinked?'
+ else:
+ question += 'target to
\03{{lightpurple}}{0}\03{{default}}?'.format(
+ self._new.canonical_title())
+
+ choice = pywikibot.input_choice(
+ question.format(self._old.canonical_title()),
+ choices, default=self._default, automatic_quit=self._quit)
+
+ return self.handle_answer(choice)
@property
def current_link(self):
diff --git a/scripts/unlink.py b/scripts/unlink.py
index a5deb82..1b663d8 100755
--- a/scripts/unlink.py
+++ b/scripts/unlink.py
@@ -30,14 +30,51 @@
__version__ = '$Id$'
#
-import re
import pywikibot
-from pywikibot.editor import TextEditor
from pywikibot import i18n
-from pywikibot.bot import SingleSiteBot
+from pywikibot.bot import (
+ SingleSiteBot, ExistingPageBot, NoRedirectPageBot, InteractiveReplace,
+ ChoiceException, UnhandledAnswer, AlwaysChoice,
+)
+from pywikibot.editor import TextEditor
+from pywikibot.textlib import replace_links
-class UnlinkBot(SingleSiteBot):
+class EditReplacement(ChoiceException, UnhandledAnswer):
+
+ """The text should be edited and replacement should be restarted."""
+
+ def __init__(self):
+ """Constructor."""
+ super(EditReplacement, self).__init__('edit', 'e')
+ self.stop = True
+
+
+class InteractiveUnlink(InteractiveReplace):
+
+ """An implementation which just allows unlinking."""
+
+ def __init__(self, bot):
+ """Create default settings."""
+ super(InteractiveUnlink, self).__init__(
+ old_link=bot.pageToUnlink, new_link=False, default='u')
+ self._always = AlwaysChoice(self, 'unlink all pages', 'a')
+ self._always.always = bot.getOption('always')
+ self.additional_choices = [AlwaysChoice(self, 'unlink all on page',
'p'),
+ self._always, EditReplacement()]
+ self._bot = bot
+ self.allow_replace = False
+ self.context = 100
+ self.context_change = 100
+
+ def handle_answer(self, choice):
+ """Handle choice and store in bot's options."""
+ answer = super(InteractiveUnlink, self).handle_answer(choice)
+ self._bot.options['always'] = self._always.always
+ return answer
+
+
+class UnlinkBot(SingleSiteBot, ExistingPageBot, NoRedirectPageBot):
"""Page unlinking bot."""
@@ -51,119 +88,37 @@
super(UnlinkBot, self).__init__(site=pageToUnlink.site, **kwargs)
self.pageToUnlink = pageToUnlink
- linktrail = self.pageToUnlink.site.linktrail()
self.generator = pageToUnlink.getReferences(
namespaces=self.getOption('namespaces'), content=True)
- # The regular expression which finds links. Results consist of four
- # groups:
- #
- # group title is the target page title, that is, everything
- # before | or ].
- #
- # group section is the page section.
- # It'll include the # to make life easier for us.
- #
- # group label is the alternative link title, that's everything
- # between | and ].
- #
- # group linktrail is the link trail, that's letters after ]] which are
- # part of the word.
- # note that the definition of 'letter' varies from language to
language.
- self.linkR =
re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)'
- % linktrail)
self.comment = i18n.twtranslate(self.pageToUnlink.site,
'unlink-unlinking',
self.pageToUnlink.title())
- def handleNextLink(self, text, match, context=100):
- """
- Return a tuple (text, jumpToBeginning).
+ def _create_callback(self):
+ """Create a new callback instance for replace_links."""
+ return InteractiveUnlink(self)
- text is the unicode string after the current link has been processed.
- jumpToBeginning is a boolean which specifies if the cursor position
- should be reset to 0. This is required after the user has edited the
- article.
- """
- # ignore interwiki links and links to sections of the same page as well
- # as section links
- if not match.group('title') \
- or self.pageToUnlink.site.isInterwikiLink(match.group('title')) \
- or match.group('section'):
- return text, False
- linkedPage = pywikibot.Page(self.pageToUnlink.site,
- match.group('title'))
- # Check whether the link found is to the current page itself.
- if linkedPage != self.pageToUnlink:
- # not a self-link
- return text, False
- else:
- # at the beginning of the link, start red color.
- # at the end of the link, reset the color to default
- if self.getOption('always'):
- choice = 'a'
- else:
- pywikibot.output(
- text[max(0, match.start() - context):match.start()] +
- '\03{lightred}' + text[match.start():match.end()] +
- '\03{default}' + text[match.end():match.end() + context])
- choice = pywikibot.input_choice(
- u'\nWhat shall be done with this link?\n',
- [('unlink', 'u'), ('skip', 's'), ('edit', 'e'),
- ('more context', 'm'), ('unlink all', 'a')], 'u')
- pywikibot.output(u'')
-
- if choice == 's':
- # skip this link
- return text, False
- elif choice == 'e':
- editor = TextEditor()
- newText = editor.edit(text, jumpIndex=match.start())
- # if user didn't press Cancel
- if newText:
- return newText, True
- else:
- return text, True
- elif choice == 'm':
- # show more context by recursive self-call
- return self.handleNextLink(text, match,
- context=context + 100)
- elif choice == 'a':
- self.options['always'] = True
- new = match.group('label') or match.group('title')
- new += match.group('linktrail')
- return text[:match.start()] + new + text[match.end():], False
-
- def treat(self, page):
+ def treat_page(self):
"""Remove links pointing to the configured page from the given page."""
- self.current_page = page
- try:
- oldText = page.get()
- text = oldText
- curpos = 0
- while curpos < len(text):
- match = self.linkR.search(text, pos=curpos)
- if not match:
- break
- # Make sure that next time around we will not find this same
- # hit.
- curpos = match.start() + 1
- text, jumpToBeginning = self.handleNextLink(text, match)
- if jumpToBeginning:
- curpos = 0
- if oldText == text:
- pywikibot.output(u'No changes necessary.')
+ text = self.current_page.text
+ while True:
+ unlink_callback = self._create_callback()
+ try:
+ text = replace_links(text, unlink_callback,
+ self.pageToUnlink.site)
+ except EditReplacement:
+ new_text = TextEditor().edit(
+ unlink_callback.current_text,
+ jumpIndex=unlink_callback.current_range[0])
+ # if user didn't press Cancel
+ if new_text:
+ text = new_text
+ else:
+ text = unlink_callback.current_text
else:
- pywikibot.showDiff(oldText, text)
- page.text = text
- page.save(self.comment)
- except pywikibot.NoPage:
- pywikibot.output(u"Page %s does not exist?!"
- % page.title(asLink=True))
- except pywikibot.IsRedirectPage:
- pywikibot.output(u"Page %s is a redirect; skipping."
- % page.title(asLink=True))
- except pywikibot.LockedPage:
- pywikibot.output(u"Page %s is locked?!" % page.title(asLink=True))
+ break
+
+ self.put_current(text, summary=self.comment)
def main(*args):
--
To view, visit https://gerrit.wikimedia.org/r/219209
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I7056c773761a5d1673d6e1d0ad5252861ff0d02f
Gerrit-PatchSet: 7
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Merlijn van Deen <[email protected]>
Gerrit-Reviewer: XZise <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits