jenkins-bot has submitted this change and it was merged.
Change subject: Add misspelling.py to core
......................................................................
Add misspelling.py to core
Change-Id: Ic4b40cd2203beecb382911cf9bc8d5ce44e129cb
---
A scripts/misspelling.py
1 file changed, 143 insertions(+), 0 deletions(-)
Approvals:
Merlijn van Deen: Looks good to me, approved
Alex S.H. Lin: Verified; Looks good to me, but someone else must approve
jenkins-bot: Verified
diff --git a/scripts/misspelling.py b/scripts/misspelling.py
new file mode 100644
index 0000000..90fa226
--- /dev/null
+++ b/scripts/misspelling.py
@@ -0,0 +1,143 @@
+# -*- coding: utf-8 -*-
+"""
+This script works similar to solve_disambiguation.py. It is supposed to fix
+links that contain common spelling mistakes. This is only possible on wikis
+that have a template for these misspellings.
+
+Command line options:
+
+ -always:XY instead of asking the user what to do, always perform the same
+ action. For example, XY can be "r0", "u" or "2". Be careful with
+ this option, and check the changes made by the bot. Note that
+ some choices for XY don't make sense and will result in a loop,
+ e.g. "l" or "m".
+
+ -start:XY goes through all misspellings in the category on your wiki
+ that is defined (to the bot) as the category containing
+ misspelling pages, starting at XY. If the -start argument is not
+ given, it starts at the beginning.
+
+ -main only check pages in the main namespace, not in the talk,
+ wikipedia, user, etc. namespaces.
+"""
+
+# (C) Daniel Herding, 2007
+# (C) Pywikibot team, 2007-2014
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+#
+
+import pywikibot
+from pywikibot import i18n, pagegenerators
+from solve_disambiguation import DisambiguationRobot
+
+
+class MisspellingRobot(DisambiguationRobot):
+
+ misspellingTemplate = {
+ 'da': None, # uses simple redirects
+ 'de': u'Falschschreibung',
+ 'en': None, # uses simple redirects
+ 'hu': None, # uses simple redirects
+ 'nl': None,
+ }
+
+ # Optional: if there is a category, one can use the -start
+ # parameter.
+ misspellingCategory = {
+ 'da': u'Omdirigeringer af fejlstavninger', # only contains date
redirects at the moment
+ 'de': u'Kategorie:Wikipedia:Falschschreibung',
+ 'en': u'Redirects from misspellings',
+ 'hu': u'Átirányítások hibás névről',
+ 'nl': u'Categorie:Wikipedia:Redirect voor spelfout',
+ }
+
+ def __init__(self, always, firstPageTitle, main_only):
+ super(MisspellingRobot, self).__init__(
+ always, [], True, False,
+ self.createPageGenerator(firstPageTitle), False, main_only)
+
+ def createPageGenerator(self, firstPageTitle):
+ mysite = pywikibot.Site()
+ mylang = mysite.lang
+ if mylang in self.misspellingCategory:
+ misspellingCategoryTitle = self.misspellingCategory[mylang]
+ misspellingCategory = pywikibot.Category(mysite,
+ misspellingCategoryTitle)
+ generator = pagegenerators.CategorizedPageGenerator(
+ misspellingCategory, recurse=True, start=firstPageTitle)
+ else:
+ misspellingTemplateName = 'Template:%s' \
+ % self.misspellingTemplate[mylang]
+ misspellingTemplate = pywikibot.Page(mysite,
+ misspellingTemplateName)
+ generator = pagegenerators.ReferringPageGenerator(
+ misspellingTemplate, onlyTemplateInclusion=True)
+ if firstPageTitle:
+ pywikibot.output(
+ u'-start parameter unsupported on this wiki because there '
+ u'is no category for misspellings.')
+ preloadingGen = pagegenerators.PreloadingGenerator(generator)
+ return preloadingGen
+
+ # Overrides the DisambiguationRobot method.
+ def findAlternatives(self, disambPage):
+ if disambPage.isRedirectPage():
+ self.alternatives.append(disambPage.getRedirectTarget().title())
+ return True
+ elif self.misspellingTemplate[disambPage.site.lang] is not None:
+ for template, params in disambPage.templatesWithParams():
+ if template.title() in self.misspellingTemplate[self.mylang]:
+ # The correct spelling is in the last paramter.
+ correctSpelling = params[-1]
+ # On de.wikipedia, there are some cases where the
+ # misspelling is ambigous, see for example:
+ # http://de.wikipedia.org/wiki/Buthan
+ for match in self.linkR.finditer(correctSpelling):
+ self.alternatives.append(match.group('title'))
+
+ if not self.alternatives:
+ # There were no links in the parameter, so there is
+ # only one correct spelling.
+ self.alternatives.append(correctSpelling)
+ return True
+
+ # Overrides the DisambiguationRobot method.
+ def setSummaryMessage(self, disambPage, new_targets=[], unlink=False,
+ dn=False):
+ # TODO: setSummaryMessage() in solve_disambiguation now has parameters
+ # new_targets and unlink. Make use of these here.
+ self.comment = i18n.twtranslate(self.mysite, 'misspelling-fixing',
+ {'page': disambPage.title()})
+
+
+def main():
+ # the option that's always selected when the bot wonders what to do with
+ # a link. If it's None, the user is prompted (default behaviour).
+ always = None
+ main_only = False
+ firstPageTitle = None
+
+ for arg in pywikibot.handleArgs():
+ if arg.startswith('-always:'):
+ always = arg[8:]
+ elif arg.startswith('-start'):
+ if len(arg) == 6:
+ firstPageTitle = pywikibot.input(
+ u'At which page do you want to start?')
+ else:
+ firstPageTitle = arg[7:]
+ elif arg == '-main':
+ main_only = True
+
+ bot = MisspellingRobot(always, firstPageTitle, main_only)
+ bot.run()
+
+
+if __name__ == "__main__":
+ try:
+ main()
+ finally:
+ pywikibot.stopme()
--
To view, visit https://gerrit.wikimedia.org/r/119292
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ic4b40cd2203beecb382911cf9bc8d5ce44e129cb
Gerrit-PatchSet: 5
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: Alex S.H. Lin <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Merlijn van Deen <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits