John Vandenberg has submitted this change and it was merged.
Change subject: [FEAT] disambredir: Use replace_links
......................................................................
[FEAT] disambredir: Use replace_links
This adds support for replace_links added in 18e6c9b1 in the disambredir
script.
It also adds a generic class which asks the user how they want to replace a
link. It supports all different modes supported by replace_links.
Change-Id: I3e24f3ecc54b2cb8c6557cac5f5bd23d85709342
---
M pywikibot/bot.py
M scripts/disambredir.py
2 files changed, 157 insertions(+), 90 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
XZise: Looks good to me, but someone else must approve
jenkins-bot: Verified
diff --git a/pywikibot/bot.py b/pywikibot/bot.py
index 6455325..9351f5d 100644
--- a/pywikibot/bot.py
+++ b/pywikibot/bot.py
@@ -680,6 +680,154 @@
force=force)
+class InteractiveReplace(object):
+
+ """
+ A callback class for textlib's replace_links.
+
+ It shows various options which can be switched on and off:
+ * allow_skip_link = True (skip the current link)
+ * allow_unlink = True (unlink)
+ * allow_replace = True (just replace target, keep section and label)
+ * allow_replace_section = False (replace target and section, keep label)
+ * allow_replace_label = False (replace target and label, keep section)
+ * allow_replace_all = False (replace target, section and label)
+ (The boolean values are the default values)
+
+ It has also a 'context' attribute which must be a non-negative integer. If
+ it is greater 0 it shows that many characters before and after the link in
+ question.
+
+ Subclasses may overwrite build_choices and handle_answer to add custom made
+ answers.
+ """
+
+ def __init__(self, old_link, new_link, default=None, automatic_quit=True,
+ yes_shortcut=True):
+ """
+ Constructor.
+
+ @param old_link: The old link which is searched. The label and section
+ are ignored.
+ @type old_link: Link or Page
+ @param new_link: The new link with which it should be replaced.
+ Depending on the replacement mode it'll use this link's label and
+ section.
+ @type new_link: Link or Page
+ @param default: The default answer as the shortcut
+ @type default: None or str
+ @param automatic_quit: Add an option to quit and raise a
+ QuitKeyboardException.
+ @type automatic_quit: bool
+ @param yes_shortcut: Make the first replacement option accessible via
+ 'y' shortcut (does not apply to unlink).
+ @type yes_shortcut: bool
+ """
+ if isinstance(old_link, pywikibot.Page):
+ self._old = old_link._link
+ else:
+ self._old = old_link
+ if isinstance(new_link, pywikibot.Page):
+ self._new = new_link._link
+ else:
+ self._new = new_link
+ self._default = default
+ self._quit = automatic_quit
+ self._yes = yes_shortcut
+ self.context = 30
+ self.allow_skip_link = True
+ self.allow_unlink = True
+ self.allow_replace = True
+ self.allow_replace_section = False
+ self.allow_replace_label = False
+ self.allow_replace_all = False
+
+ def build_choices(self):
+ """
+ Return the choices and what the shortcut 'y' actually means.
+
+ The shortcut alias for 'y' may be either what yes_shortcut is in the
+ constructor negated or the actual shortcut used. So if it didn't use
'y'
+ at all it's a boolean (True if there are replacements and it was
+ disabled, False if there are no replacements and it is enabled) and
+ otherwise it's one character.
+ """
+ choices = []
+ if self.allow_skip_link:
+ choices += [('Do not change', 'n')]
+ if self.allow_unlink:
+ choices += [('Unlink', 'u')]
+ yes_used = not self._yes
+ if self.allow_replace:
+ choices += [('Change link target', 't' if yes_used else 'y')]
+ yes_used = 't'
+ if self.allow_replace_section:
+ choices += [('Change link target and section', 's' if yes_used
else 'y')]
+ yes_used = 's'
+ if self.allow_replace_label:
+ choices += [('Change link target and label', 'l' if yes_used else
'y')]
+ yes_used = 'l'
+ if self.allow_replace_all:
+ choices += [('Change complete link', 'c' if yes_used else 'y')]
+ yes_used = 'c'
+ # 'y' was disabled in the constructor so return False as it was
actually
+ # not used
+ if yes_used is True:
+ yes_used = False
+ return choices, yes_used
+
+ def handle_answer(self, choice, link):
+ """Return the result for replace_links."""
+ if choice == 'n':
+ return None
+ elif choice == 'u':
+ return False
+ elif choice == 't':
+ return self._new.canonical_title()
+ elif choice == 's':
+ return pywikibot.Link.create_separated(
+ self._new.canonical_title(), self._new.site,
+ section=self._new.section, label=link.anchor)
+ elif choice == 'l':
+ return pywikibot.Link.create_separated(
+ self._new.canonical_title(), self._new.site,
+ section=link.section, label=self._new.anchor)
+ else:
+ assert choice == 'c', 'Invalid choice {0}'.format(choice)
+ return self._new
+
+ def __call__(self, link, text, groups, rng):
+ """Ask user how the selected link should be replaced."""
+ if self._old == link:
+ if self.context > 0:
+ # at the beginning of the link, start red color.
+ # at the end of the link, reset the color to default
+ pywikibot.output(text[max(0, rng[0] - self.context): rng[0]] +
+ '\03{lightred}' + text[rng[0]: rng[1]] +
+ '\03{default}' + text[rng[1]: rng[1] +
self.context])
+ question = ('Should the link target to '
+ '\03{{lightpurple}}{0}\03{{default}}?')
+ else:
+ question = ('Should the link \03{{lightred}}{1}\03{{default}} '
+ 'target to \03{{lightpurple}}{0}\03{{default}}?')
+ choices, yes_alias = self.build_choices()
+ if yes_alias and self._default == yes_alias:
+ default = 'y'
+ else:
+ default = self._default
+
+ choice = pywikibot.input_choice(
+ question.format(self._new.canonical_title(),
+ self._old.canonical_title()),
+ choices, default=default, automatic_quit=self._quit)
+ if yes_alias and choice == 'y':
+ choice = yes_alias
+
+ return self.handle_answer(choice, link)
+ else:
+ return None
+
+
# Command line parsing and help
def calledModuleName():
"""Return the name of the module calling this function.
diff --git a/scripts/disambredir.py b/scripts/disambredir.py
index a7246a2..2bc2d8c 100755
--- a/scripts/disambredir.py
+++ b/scripts/disambredir.py
@@ -19,10 +19,10 @@
__version__ = '$Id$'
#
-import re
import pywikibot
-from pywikibot import i18n, pagegenerators
-from pywikibot.tools import first_lower, first_upper as firstcap
+
+from pywikibot import i18n, textlib, pagegenerators
+from pywikibot.bot import InteractiveReplace
msg = {
'ar': u'تغيير التحويلات في صفحة توضيح',
@@ -40,92 +40,6 @@
}
-def treat(text, linkedPage, targetPage):
- """Based on the method of the same name in solve_disambiguation.py."""
- # make a backup of the original text so we can show the changes later
- mysite = pywikibot.Site()
- linktrail = mysite.linktrail()
- linkR = re.compile(
-
r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)'
- % linktrail)
- curpos = 0
- # This loop will run until we have finished the current page
- while True:
- m = linkR.search(text, pos=curpos)
- if not m:
- break
- # Make sure that next time around we will not find this same hit.
- curpos = m.start() + 1
- # ignore interwiki links and links to sections of the same page
- if m.group('title') == '' or mysite.isInterwikiLink(m.group('title')):
- continue
- else:
- actualLinkPage = pywikibot.Page(mysite, m.group('title'))
- # Check whether the link found is to page.
- if actualLinkPage != linkedPage:
- continue
-
- # how many bytes should be displayed around the current link
- context = 30
- # at the beginning of the link, start red color.
- # at the end of the link, reset the color to default
- pywikibot.output(text[max(0, m.start() - context): m.start()] +
- '\03{lightred}' + text[m.start(): m.end()] +
- '\03{default}' + text[m.end(): m.end() + context])
- choice = pywikibot.input_choice(
- 'What should be done with the link?',
- (('Do not change', 'n'),
- ('Change link to \03{lightpurple}%s\03{default}'
- % targetPage.title(), 'y'),
- ('Change and replace text', 'r'), ('Unlink', 'u')),
- default='n', automatic_quit=False)
-
- if choice == 'n':
- continue
-
- # The link looks like this:
- # [[page_title|link_text]]trailing_chars
- page_title = m.group('title')
- link_text = m.group('label')
- if not link_text:
- # or like this: [[page_title]]trailing_chars
- link_text = page_title
- if m.group('section') is None:
- section = ''
- else:
- section = m.group('section')
- trailing_chars = m.group('linktrail')
- if trailing_chars:
- link_text += trailing_chars
-
- if choice == 'u':
- # unlink - we remove the section if there's any
- text = text[:m.start()] + link_text + text[m.end():]
- continue
-
- if link_text[0].isupper():
- new_page_title = targetPage.title()
- else:
- new_page_title = first_lower(targetPage.title())
- if choice == 'r' and trailing_chars:
- newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars)
- elif choice == 'r' or (new_page_title == link_text and not section):
- newlink = "[[%s]]" % new_page_title
- # check if we can create a link with trailing characters instead of a
- # pipelink
- elif len(new_page_title) <= len(link_text) and \
- firstcap(link_text[:len(new_page_title)]) == \
- firstcap(new_page_title) and \
- re.sub(re.compile(linktrail), '',
link_text[len(new_page_title):]) == '' and not section:
- newlink = "[[%s]]%s" % (link_text[:len(new_page_title)],
- link_text[len(new_page_title):])
- else:
- newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text)
- text = text[:m.start()] + newlink + text[m.end():]
- continue
- return text
-
-
def workon(page, links):
"""Execute treat for the given page which is linking to the given links."""
text = page.get()
@@ -138,7 +52,12 @@
target = page2.getRedirectTarget()
except (pywikibot.Error, pywikibot.SectionError):
continue
- text = treat(text, page2, target)
+ replace_callback = InteractiveReplace(
+ page2, target, default='n', automatic_quit=False,
yes_shortcut=True)
+ replace_callback.allow_replace_label = True
+ # TODO: Work on all links at the same time (would mean that the user
+ # doesn't get them ordered like in links but how they appear in the
page)
+ text = textlib.replace_links(text, replace_callback, page.site)
if text != page.get():
comment = i18n.translate(page.site, msg, fallback=True)
page.put(text, comment)
--
To view, visit https://gerrit.wikimedia.org/r/217824
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I3e24f3ecc54b2cb8c6557cac5f5bd23d85709342
Gerrit-PatchSet: 3
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Merlijn van Deen <[email protected]>
Gerrit-Reviewer: XZise <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits