Vldandrew has uploaded a new change for review.
https://gerrit.wikimedia.org/r/102912
Change subject: Port disambredir.py to core
......................................................................
Port disambredir.py to core
Change-Id: If6cb976cd47675ff780f49029f5fa5277b9fd95c
---
A scripts/.ropeproject/config.py
A scripts/.ropeproject/globalnames
A scripts/.ropeproject/history
A scripts/.ropeproject/objectdb
A scripts/disambredir.py
5 files changed, 268 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core
refs/changes/12/102912/1
diff --git a/scripts/.ropeproject/config.py b/scripts/.ropeproject/config.py
new file mode 100644
index 0000000..ffebcd4
--- /dev/null
+++ b/scripts/.ropeproject/config.py
@@ -0,0 +1,85 @@
+# The default ``config.py``
+
+
+def set_prefs(prefs):
+ """This function is called before opening the project"""
+
+ # Specify which files and folders to ignore in the project.
+ # Changes to ignored resources are not added to the history and
+ # VCSs. Also they are not returned in `Project.get_files()`.
+ # Note that ``?`` and ``*`` match all characters but slashes.
+ # '*.pyc': matches 'test.pyc' and 'pkg/test.pyc'
+ # 'mod*.pyc': matches 'test/mod1.pyc' but not 'mod/1.pyc'
+ # '.svn': matches 'pkg/.svn' and all of its children
+ # 'build/*.o': matches 'build/lib.o' but not 'build/sub/lib.o'
+ # 'build//*.o': matches 'build/lib.o' and 'build/sub/lib.o'
+ prefs['ignored_resources'] = ['*.pyc', '*~', '.ropeproject',
+ '.hg', '.svn', '_svn', '.git']
+
+ # Specifies which files should be considered python files. It is
+ # useful when you have scripts inside your project. Only files
+ # ending with ``.py`` are considered to be python files by
+ # default.
+ #prefs['python_files'] = ['*.py']
+
+ # Custom source folders: By default rope searches the project
+ # for finding source folders (folders that should be searched
+ # for finding modules). You can add paths to that list. Note
+ # that rope guesses project source folders correctly most of the
+ # time; use this if you have any problems.
+ # The folders should be relative to project root and use '/' for
+ # separating folders regardless of the platform rope is running on.
+ # 'src/my_source_folder' for instance.
+ #prefs.add('source_folders', 'src')
+
+ # You can extend python path for looking up modules
+ #prefs.add('python_path', '~/python/')
+
+ # Should rope save object information or not.
+ prefs['save_objectdb'] = True
+ prefs['compress_objectdb'] = False
+
+ # If `True`, rope analyzes each module when it is being saved.
+ prefs['automatic_soa'] = True
+ # The depth of calls to follow in static object analysis
+ prefs['soa_followed_calls'] = 0
+
+ # If `False` when running modules or unit tests "dynamic object
+ # analysis" is turned off. This makes them much faster.
+ prefs['perform_doa'] = True
+
+ # Rope can check the validity of its object DB when running.
+ prefs['validate_objectdb'] = True
+
+ # How many undos to hold?
+ prefs['max_history_items'] = 32
+
+ # Shows whether to save history across sessions.
+ prefs['save_history'] = True
+ prefs['compress_history'] = False
+
+ # Set the number spaces used for indenting. According to
+ # :PEP:`8`, it is best to use 4 spaces. Since most of rope's
+ # unit-tests use 4 spaces it is more reliable, too.
+ prefs['indent_size'] = 4
+
+ # Builtin and c-extension modules that are allowed to be imported
+ # and inspected by rope.
+ prefs['extension_modules'] = []
+
+ # Add all standard c-extensions to extension_modules list.
+ prefs['import_dynload_stdmods'] = True
+
+ # If `True` modules with syntax errors are considered to be empty.
+ # The default value is `False`; When `False` syntax errors raise
+ # `rope.base.exceptions.ModuleSyntaxError` exception.
+ prefs['ignore_syntax_errors'] = False
+
+ # If `True`, rope ignores unresolvable imports. Otherwise, they
+ # appear in the importing namespace.
+ prefs['ignore_bad_imports'] = False
+
+
+def project_opened(project):
+ """This function is called after opening the project"""
+ # Do whatever you like here!
diff --git a/scripts/.ropeproject/globalnames b/scripts/.ropeproject/globalnames
new file mode 100644
index 0000000..b560d8d
--- /dev/null
+++ b/scripts/.ropeproject/globalnames
@@ -0,0 +1 @@
+}qUdisambredir]q(UmainqUfirstcapqUworkonqUtreatqUmsgqes.
\ No newline at end of file
diff --git a/scripts/.ropeproject/history b/scripts/.ropeproject/history
new file mode 100644
index 0000000..fcd9c96
--- /dev/null
+++ b/scripts/.ropeproject/history
@@ -0,0 +1 @@
+]q(]q]qe.
\ No newline at end of file
diff --git a/scripts/.ropeproject/objectdb b/scripts/.ropeproject/objectdb
new file mode 100644
index 0000000..29c40cd
--- /dev/null
+++ b/scripts/.ropeproject/objectdb
@@ -0,0 +1 @@
+}q.
\ No newline at end of file
diff --git a/scripts/disambredir.py b/scripts/disambredir.py
new file mode 100644
index 0000000..eed64a0
--- /dev/null
+++ b/scripts/disambredir.py
@@ -0,0 +1,180 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+Goes through the disambiguation pages, checks their links, and asks for
+each link that goes to a redirect page whether it should be replaced.
+"""
+#
+# (C) André Engels and others, 2006-2009
+#
+# Distributed under the terms of the MIT license.
+#
+__version__='$Id$'
+#
+import pywikibot
+from pywikibot import pagegenerators
+import sys, re
+from pywikibot import catlib
+
+msg = {
+ 'ar': u'تغÙÙØ± Ø§ÙØªØÙÙÙØ§Øª ÙÙ ØµÙØØ© ØªÙØ¶ÙØ',
+ 'be-x-old': u'Ðамена пеÑанакÑÑаванÑнÑÑ Ð½Ð°
ÑÑаÑÐ¾Ð½ÐºÑ Ð½ÐµÐ°Ð´Ð½Ð°Ð·Ð½Ð°ÑнаÑÑÑÑÑ',
+ 'en': u'Changing redirects on a disambiguation page',
+ 'he': u'××©× × ×§×ש×ר×× ×××¤× ××ת ×××£ פ×ר×ש×× ××',
+ 'fa': u'Ø§ØµÙØ§Ø تغÛÛØ±Ù
Ø³ÛØ±Ùا در ÛÚ© ØµÙØÙ Ø§Ø¨ÙØ§Ù
âØ²Ø¯Ø§ÛÛ',
+ 'ja': u'ããããã«ãã:
ææ§ãåé¿ãã¼ã¸ã®ãªãã¤ã¬ã¯ãä¿®æ£',
+ 'nl': u'Verandering van redirects op een doorverwijspagina',
+ 'pl': u'Zmiana przekierowaÅ na stronie ujednoznaczajÄ
cej',
+ 'pt': u'Arrumando redirects na página de desambiguação',
+ 'ru': u'Ðзменение пеÑенапÑавлений на
ÑÑÑаниÑе неоднознаÑноÑÑи',
+ 'uk': u'ÐмÑна пеÑенапÑÐ°Ð²Ð»ÐµÐ½Ñ Ð½Ð° ÑÑоÑÑнÑÑ
багаÑознаÑноÑÑÑ',
+ 'zh': u'æ©å¨äºº: ä¿®æ¹æ¶æ§ç¾©é ä¸çéå®åé£çµ',
+}
+
+def firstcap(string):
+ return string[0].upper()+string[1:]
+
+def treat(text, linkedPage, targetPage):
+ """
+ Based on the method of the same name in solve_disambiguation.py.
+ """
+ # make a backup of the original text so we can show the changes later
+ linkR =
re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>'
+ linktrail + ')')
+ curpos = 0
+ # This loop will run until we have finished the current page
+ while True:
+ m = linkR.search(text, pos = curpos)
+ if not m:
+ break
+ # Make sure that next time around we will not find this same hit.
+ curpos = m.start() + 1
+ # ignore interwiki links and links to sections of the same page
+ if m.group('title') == '' or mysite.isInterwikiLink(m.group('title')):
+ continue
+ else:
+ actualLinkPage = pywikibot.Page(page.site(), m.group('title'))
+ # Check whether the link found is to page.
+ if actualLinkPage != linkedPage:
+ continue
+
+ # how many bytes should be displayed around the current link
+ context = 30
+ # at the beginning of the link, start red color.
+ # at the end of the link, reset the color to default
+ pywikibot.output(text[max(0, m.start() - context) : m.start()] +
+ '\03{lightred}' + text[m.start() : m.end()] +
+ '\03{default}' + text[m.end() : m.end() + context])
+ while True:
+ choice = pywikibot.input(
+ u"Option (N=do not change, y=change link to
\03{lightpurple}%s\03{default}, r=change and replace text,
u=unlink)"%targetPage.title())
+ try:
+ choice = choice[0]
+ except:
+ choice = 'N'
+ if choice in 'nNyYrRuU':
+ break
+ if choice in "nN":
+ continue
+
+ # The link looks like this:
+ # [[page_title|link_text]]trailing_chars
+ page_title = m.group('title')
+ link_text = m.group('label')
+ if not link_text:
+ # or like this: [[page_title]]trailing_chars
+ link_text = page_title
+ if m.group('section') == None:
+ section = ''
+ else:
+ section = m.group('section')
+ trailing_chars = m.group('linktrail')
+ if trailing_chars:
+ link_text += trailing_chars
+
+ if choice in "uU":
+ # unlink - we remove the section if there's any
+ text = text[:m.start()] + link_text + text[m.end():]
+ continue
+ replaceit = choice in "rR"
+
+ if link_text[0].isupper():
+ new_page_title = targetPage.title()
+ else:
+ new_page_title = targetPage.title()[0].lower() + \
+ targetPage.title()[1:]
+ if replaceit and trailing_chars:
+ newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars)
+ elif replaceit or (new_page_title == link_text and not section):
+ newlink = "[[%s]]" % new_page_title
+ # check if we can create a link with trailing characters instead of a
+ # pipelink
+ elif len(new_page_title) <= len(link_text) and \
+ firstcap(link_text[:len(new_page_title)]) == \
+ firstcap(new_page_title) and \
+ re.sub(re.compile(linktrail), '',
link_text[len(new_page_title):]) == '' and not section:
+ newlink = "[[%s]]%s" % (link_text[:len(new_page_title)],
+ link_text[len(new_page_title):])
+ else:
+ newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text)
+ text = text[:m.start()] + newlink + text[m.end():]
+ continue
+ return text
+
+def workon(page, links):
+ text = page.get()
+ # Show the title of the page we're working on.
+ # Highlight the title in purple.
+ pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
+ % page.title())
+ for page2 in links:
+ try:
+ target = page2.getRedirectTarget()
+ except (pywikibot.Error,pywikibot.SectionError):
+ continue
+ text = treat(text, page2, target)
+ if text != page.get():
+ comment = pywikibot.translate(mysite, msg)
+ page.put(text, comment)
+
+def main():
+ global mysite, linktrail, page
+ start = []
+ for arg in pywikibot.handleArgs():
+ start.append(arg)
+ if start:
+ start = " ".join(start)
+ else:
+ start = "!"
+ mysite = pywikibot.getSite()
+ linktrail = mysite.linktrail()
+ try:
+ generator = pagegenerators.CategorizedPageGenerator(
+ mysite.disambcategory(), start = start)
+ except pywikibot.NoPage:
+ pywikibot.output(
+ "The bot does not know the disambiguation category for your wiki.")
+ raise
+ # only work on articles
+ generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0])
+ generator = pagegenerators.PreloadingGenerator(generator)
+ pagestodo = []
+ pagestoload = []
+ for page in generator:
+ if page.isRedirectPage():
+ continue
+ linked = page.linkedPages()
+ pagestodo.append((page,linked))
+ pagestoload += linked
+ if len(pagestoload) > 49:
+ pywikibot.getall(mysite,pagestoload)
+ for page, links in pagestodo:
+ workon(page,links)
+ pagestoload = []
+ pagestodo = []
+
+if __name__ == "__main__":
+ try:
+ main()
+ finally:
+ pywikibot.stopme()
+
--
To view, visit https://gerrit.wikimedia.org/r/102912
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: If6cb976cd47675ff780f49029f5fa5277b9fd95c
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Vldandrew <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits