jenkins-bot has submitted this change and it was merged.

Change subject: Add misspelling.py to core
......................................................................


Add misspelling.py to core

Change-Id: Ic4b40cd2203beecb382911cf9bc8d5ce44e129cb
---
A scripts/misspelling.py
1 file changed, 143 insertions(+), 0 deletions(-)

Approvals:
  Merlijn van Deen: Looks good to me, approved
  Alex S.H. Lin: Verified; Looks good to me, but someone else must approve
  jenkins-bot: Verified



diff --git a/scripts/misspelling.py b/scripts/misspelling.py
new file mode 100644
index 0000000..90fa226
--- /dev/null
+++ b/scripts/misspelling.py
@@ -0,0 +1,143 @@
+# -*- coding: utf-8  -*-
+"""
+This script works similar to solve_disambiguation.py. It is supposed to fix
+links that contain common spelling mistakes. This is only possible on wikis
+that have a template for these misspellings.
+
+Command line options:
+
+   -always:XY  instead of asking the user what to do, always perform the same
+               action. For example, XY can be "r0", "u" or "2". Be careful with
+               this option, and check the changes made by the bot. Note that
+               some choices for XY don't make sense and will result in a loop,
+               e.g. "l" or "m".
+
+   -start:XY   goes through all misspellings in the category on your wiki
+               that is defined (to the bot) as the category containing
+               misspelling pages, starting at XY. If the -start argument is not
+               given, it starts at the beginning.
+
+   -main       only check pages in the main namespace, not in the talk,
+               wikipedia, user, etc. namespaces.
+"""
+
+# (C) Daniel Herding, 2007
+# (C) Pywikibot team, 2007-2014
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+#
+
+import pywikibot
+from pywikibot import i18n, pagegenerators
+from solve_disambiguation import DisambiguationRobot
+
+
+class MisspellingRobot(DisambiguationRobot):
+
+    misspellingTemplate = {
+        'da': None,                     # uses simple redirects
+        'de': u'Falschschreibung',
+        'en': None,                     # uses simple redirects
+        'hu': None,                     # uses simple redirects
+        'nl': None,
+    }
+
+    # Optional: if there is a category, one can use the -start
+    # parameter.
+    misspellingCategory = {
+        'da': u'Omdirigeringer af fejlstavninger',  # only contains date 
redirects at the moment
+        'de': u'Kategorie:Wikipedia:Falschschreibung',
+        'en': u'Redirects from misspellings',
+        'hu': u'Átirányítások hibás névről',
+        'nl': u'Categorie:Wikipedia:Redirect voor spelfout',
+    }
+
+    def __init__(self, always, firstPageTitle, main_only):
+        super(MisspellingRobot, self).__init__(
+            always, [], True, False,
+            self.createPageGenerator(firstPageTitle), False, main_only)
+
+    def createPageGenerator(self, firstPageTitle):
+        mysite = pywikibot.Site()
+        mylang = mysite.lang
+        if mylang in self.misspellingCategory:
+            misspellingCategoryTitle = self.misspellingCategory[mylang]
+            misspellingCategory = pywikibot.Category(mysite,
+                                                     misspellingCategoryTitle)
+            generator = pagegenerators.CategorizedPageGenerator(
+                misspellingCategory, recurse=True, start=firstPageTitle)
+        else:
+            misspellingTemplateName = 'Template:%s' \
+                                      % self.misspellingTemplate[mylang]
+            misspellingTemplate = pywikibot.Page(mysite,
+                                                 misspellingTemplateName)
+            generator = pagegenerators.ReferringPageGenerator(
+                misspellingTemplate, onlyTemplateInclusion=True)
+            if firstPageTitle:
+                pywikibot.output(
+                    u'-start parameter unsupported on this wiki because there '
+                    u'is no category for misspellings.')
+        preloadingGen = pagegenerators.PreloadingGenerator(generator)
+        return preloadingGen
+
+    # Overrides the DisambiguationRobot method.
+    def findAlternatives(self, disambPage):
+        if disambPage.isRedirectPage():
+            self.alternatives.append(disambPage.getRedirectTarget().title())
+            return True
+        elif self.misspellingTemplate[disambPage.site.lang] is not None:
+            for template, params in disambPage.templatesWithParams():
+                if template.title() in self.misspellingTemplate[self.mylang]:
+                    # The correct spelling is in the last paramter.
+                    correctSpelling = params[-1]
+                    # On de.wikipedia, there are some cases where the
+                    # misspelling is ambigous, see for example:
+                    # http://de.wikipedia.org/wiki/Buthan
+                    for match in self.linkR.finditer(correctSpelling):
+                        self.alternatives.append(match.group('title'))
+
+                    if not self.alternatives:
+                        # There were no links in the parameter, so there is
+                        # only one correct spelling.
+                        self.alternatives.append(correctSpelling)
+                    return True
+
+    # Overrides the DisambiguationRobot method.
+    def setSummaryMessage(self, disambPage, new_targets=[], unlink=False,
+                          dn=False):
+        # TODO: setSummaryMessage() in solve_disambiguation now has parameters
+        # new_targets and unlink. Make use of these here.
+        self.comment = i18n.twtranslate(self.mysite, 'misspelling-fixing',
+                                        {'page': disambPage.title()})
+
+
+def main():
+    # the option that's always selected when the bot wonders what to do with
+    # a link. If it's None, the user is prompted (default behaviour).
+    always = None
+    main_only = False
+    firstPageTitle = None
+
+    for arg in pywikibot.handleArgs():
+        if arg.startswith('-always:'):
+            always = arg[8:]
+        elif arg.startswith('-start'):
+            if len(arg) == 6:
+                firstPageTitle = pywikibot.input(
+                    u'At which page do you want to start?')
+            else:
+                firstPageTitle = arg[7:]
+        elif arg == '-main':
+            main_only = True
+
+    bot = MisspellingRobot(always, firstPageTitle, main_only)
+    bot.run()
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    finally:
+        pywikibot.stopme()

-- 
To view, visit https://gerrit.wikimedia.org/r/119292
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ic4b40cd2203beecb382911cf9bc8d5ce44e129cb
Gerrit-PatchSet: 5
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: Alex S.H. Lin <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Merlijn van Deen <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits

Reply via email to