Vldandrew has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/102912


Change subject: Port disambredir.py to core
......................................................................

Port disambredir.py to core

Change-Id: If6cb976cd47675ff780f49029f5fa5277b9fd95c
---
A scripts/.ropeproject/config.py
A scripts/.ropeproject/globalnames
A scripts/.ropeproject/history
A scripts/.ropeproject/objectdb
A scripts/disambredir.py
5 files changed, 268 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/12/102912/1

diff --git a/scripts/.ropeproject/config.py b/scripts/.ropeproject/config.py
new file mode 100644
index 0000000..ffebcd4
--- /dev/null
+++ b/scripts/.ropeproject/config.py
@@ -0,0 +1,85 @@
+# The default ``config.py``
+
+
+def set_prefs(prefs):
+    """This function is called before opening the project"""
+
+    # Specify which files and folders to ignore in the project.
+    # Changes to ignored resources are not added to the history and
+    # VCSs.  Also they are not returned in `Project.get_files()`.
+    # Note that ``?`` and ``*`` match all characters but slashes.
+    # '*.pyc': matches 'test.pyc' and 'pkg/test.pyc'
+    # 'mod*.pyc': matches 'test/mod1.pyc' but not 'mod/1.pyc'
+    # '.svn': matches 'pkg/.svn' and all of its children
+    # 'build/*.o': matches 'build/lib.o' but not 'build/sub/lib.o'
+    # 'build//*.o': matches 'build/lib.o' and 'build/sub/lib.o'
+    prefs['ignored_resources'] = ['*.pyc', '*~', '.ropeproject',
+                                  '.hg', '.svn', '_svn', '.git']
+
+    # Specifies which files should be considered python files.  It is
+    # useful when you have scripts inside your project.  Only files
+    # ending with ``.py`` are considered to be python files by
+    # default.
+    #prefs['python_files'] = ['*.py']
+
+    # Custom source folders:  By default rope searches the project
+    # for finding source folders (folders that should be searched
+    # for finding modules).  You can add paths to that list.  Note
+    # that rope guesses project source folders correctly most of the
+    # time; use this if you have any problems.
+    # The folders should be relative to project root and use '/' for
+    # separating folders regardless of the platform rope is running on.
+    # 'src/my_source_folder' for instance.
+    #prefs.add('source_folders', 'src')
+
+    # You can extend python path for looking up modules
+    #prefs.add('python_path', '~/python/')
+
+    # Should rope save object information or not.
+    prefs['save_objectdb'] = True
+    prefs['compress_objectdb'] = False
+
+    # If `True`, rope analyzes each module when it is being saved.
+    prefs['automatic_soa'] = True
+    # The depth of calls to follow in static object analysis
+    prefs['soa_followed_calls'] = 0
+
+    # If `False` when running modules or unit tests "dynamic object
+    # analysis" is turned off.  This makes them much faster.
+    prefs['perform_doa'] = True
+
+    # Rope can check the validity of its object DB when running.
+    prefs['validate_objectdb'] = True
+
+    # How many undos to hold?
+    prefs['max_history_items'] = 32
+
+    # Shows whether to save history across sessions.
+    prefs['save_history'] = True
+    prefs['compress_history'] = False
+
+    # Set the number spaces used for indenting.  According to
+    # :PEP:`8`, it is best to use 4 spaces.  Since most of rope's
+    # unit-tests use 4 spaces it is more reliable, too.
+    prefs['indent_size'] = 4
+
+    # Builtin and c-extension modules that are allowed to be imported
+    # and inspected by rope.
+    prefs['extension_modules'] = []
+
+    # Add all standard c-extensions to extension_modules list.
+    prefs['import_dynload_stdmods'] = True
+
+    # If `True` modules with syntax errors are considered to be empty.
+    # The default value is `False`; When `False` syntax errors raise
+    # `rope.base.exceptions.ModuleSyntaxError` exception.
+    prefs['ignore_syntax_errors'] = False
+
+    # If `True`, rope ignores unresolvable imports.  Otherwise, they
+    # appear in the importing namespace.
+    prefs['ignore_bad_imports'] = False
+
+
+def project_opened(project):
+    """This function is called after opening the project"""
+    # Do whatever you like here!
diff --git a/scripts/.ropeproject/globalnames b/scripts/.ropeproject/globalnames
new file mode 100644
index 0000000..b560d8d
--- /dev/null
+++ b/scripts/.ropeproject/globalnames
@@ -0,0 +1 @@
+€}qUdisambredir]q(UmainqUfirstcapqUworkonqUtreatqUmsgqes.
\ No newline at end of file
diff --git a/scripts/.ropeproject/history b/scripts/.ropeproject/history
new file mode 100644
index 0000000..fcd9c96
--- /dev/null
+++ b/scripts/.ropeproject/history
@@ -0,0 +1 @@
+€]q(]q]qe.
\ No newline at end of file
diff --git a/scripts/.ropeproject/objectdb b/scripts/.ropeproject/objectdb
new file mode 100644
index 0000000..29c40cd
--- /dev/null
+++ b/scripts/.ropeproject/objectdb
@@ -0,0 +1 @@
+€}q.
\ No newline at end of file
diff --git a/scripts/disambredir.py b/scripts/disambredir.py
new file mode 100644
index 0000000..eed64a0
--- /dev/null
+++ b/scripts/disambredir.py
@@ -0,0 +1,180 @@
+#!/usr/bin/python
+# -*- coding: utf-8  -*-
+"""
+Goes through the disambiguation pages, checks their links, and asks for
+each link that goes to a redirect page whether it should be replaced.
+"""
+#
+# (C) André Engels and others, 2006-2009
+#
+# Distributed under the terms of the MIT license.
+#
+__version__='$Id$'
+#
+import pywikibot
+from pywikibot import pagegenerators
+import sys, re
+from pywikibot import catlib
+
+msg = {
+    'ar': u'تغيير التحويلات في صفحة توضيح',
+    'be-x-old': u'Замена перанакіраваньняў на 
старонку неадназначнасьцяў',
+    'en': u'Changing redirects on a disambiguation page',
+    'he': u'משנה קישורים להפניות בדף פירושונים',
+    'fa': u'اصلاح تغییرمسیرها در یک صفحه ابهام
‌زدایی',
+    'ja': u'ロボットによる: 
曖昧さ回避ページのリダイレクト修正',
+    'nl': u'Verandering van redirects op een doorverwijspagina',
+    'pl': u'Zmiana przekierowań na stronie ujednoznaczającej',
+    'pt': u'Arrumando redirects na página de desambiguação',
+    'ru': u'Изменение перенаправлений на 
странице неоднозначности',
+    'uk': u'Зміна перенаправлень на сторінці 
багатозначності',
+    'zh': u'機器人: 修改消歧義頁中的重定向連結',
+}
+
+def firstcap(string):
+    return string[0].upper()+string[1:]
+
+def treat(text, linkedPage, targetPage):
+    """
+    Based on the method of the same name in solve_disambiguation.py.
+    """
+    # make a backup of the original text so we can show the changes later
+    linkR = 
re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>'
 + linktrail + ')')
+    curpos = 0
+    # This loop will run until we have finished the current page
+    while True:
+        m = linkR.search(text, pos = curpos)
+        if not m:
+            break
+        # Make sure that next time around we will not find this same hit.
+        curpos = m.start() + 1
+        # ignore interwiki links and links to sections of the same page
+        if m.group('title') == '' or mysite.isInterwikiLink(m.group('title')):
+            continue
+        else:
+            actualLinkPage = pywikibot.Page(page.site(), m.group('title'))
+            # Check whether the link found is to page.
+            if actualLinkPage != linkedPage:
+                continue
+
+        # how many bytes should be displayed around the current link
+        context = 30
+        # at the beginning of the link, start red color.
+        # at the end of the link, reset the color to default
+        pywikibot.output(text[max(0, m.start() - context) : m.start()] +
+                         '\03{lightred}' + text[m.start() : m.end()] +
+                         '\03{default}' + text[m.end() : m.end() + context])
+        while True:
+            choice = pywikibot.input(
+                u"Option (N=do not change, y=change link to 
\03{lightpurple}%s\03{default}, r=change and replace text, 
u=unlink)"%targetPage.title())
+            try:
+                choice = choice[0]
+            except:
+                choice = 'N'
+            if choice in 'nNyYrRuU':
+                break
+        if choice in "nN":
+            continue
+
+        # The link looks like this:
+        # [[page_title|link_text]]trailing_chars
+        page_title = m.group('title')
+        link_text = m.group('label')
+        if not link_text:
+            # or like this: [[page_title]]trailing_chars
+            link_text = page_title
+        if m.group('section') == None:
+            section = ''
+        else:
+            section = m.group('section')
+        trailing_chars = m.group('linktrail')
+        if trailing_chars:
+            link_text += trailing_chars
+
+        if choice in "uU":
+            # unlink - we remove the section if there's any
+            text = text[:m.start()] + link_text + text[m.end():]
+            continue
+        replaceit = choice in "rR"
+
+        if link_text[0].isupper():
+            new_page_title = targetPage.title()
+        else:
+            new_page_title = targetPage.title()[0].lower() + \
+                             targetPage.title()[1:]
+        if replaceit and trailing_chars:
+            newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars)
+        elif replaceit or (new_page_title == link_text and not section):
+            newlink = "[[%s]]" % new_page_title
+        # check if we can create a link with trailing characters instead of a
+        # pipelink
+        elif len(new_page_title) <= len(link_text) and \
+             firstcap(link_text[:len(new_page_title)]) == \
+             firstcap(new_page_title) and \
+             re.sub(re.compile(linktrail), '', 
link_text[len(new_page_title):]) == '' and not section:
+            newlink = "[[%s]]%s" % (link_text[:len(new_page_title)],
+                                    link_text[len(new_page_title):])
+        else:
+            newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text)
+        text = text[:m.start()] + newlink + text[m.end():]
+        continue
+    return text
+
+def workon(page, links):
+    text = page.get()
+    # Show the title of the page we're working on.
+    # Highlight the title in purple.
+    pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
+                     % page.title())
+    for page2 in links:
+        try:
+            target = page2.getRedirectTarget()
+        except (pywikibot.Error,pywikibot.SectionError):
+            continue
+        text = treat(text, page2, target)
+    if text != page.get():
+        comment = pywikibot.translate(mysite, msg)
+        page.put(text, comment)
+
+def main():
+    global mysite, linktrail, page
+    start = []
+    for arg in pywikibot.handleArgs():
+        start.append(arg)
+    if start:
+        start = " ".join(start)
+    else:
+        start = "!"
+    mysite = pywikibot.getSite()
+    linktrail = mysite.linktrail()
+    try:
+        generator = pagegenerators.CategorizedPageGenerator(
+            mysite.disambcategory(), start = start)
+    except pywikibot.NoPage:
+        pywikibot.output(
+            "The bot does not know the disambiguation category for your wiki.")
+        raise
+    # only work on articles
+    generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0])
+    generator = pagegenerators.PreloadingGenerator(generator)
+    pagestodo = []
+    pagestoload = []
+    for page in generator:
+        if page.isRedirectPage():
+            continue
+        linked = page.linkedPages()
+        pagestodo.append((page,linked))
+        pagestoload += linked
+        if len(pagestoload) > 49:
+            pywikibot.getall(mysite,pagestoload)
+            for page, links in pagestodo:
+                workon(page,links)
+            pagestoload = []
+            pagestodo = []
+
+if __name__ == "__main__":
+    try:
+        main()
+    finally:
+        pywikibot.stopme()
+

-- 
To view, visit https://gerrit.wikimedia.org/r/102912
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: If6cb976cd47675ff780f49029f5fa5277b9fd95c
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Vldandrew <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to