Prianka has uploaded a new change for review.
https://gerrit.wikimedia.org/r/185671
Change subject: Porting ndashredir.py from compat to core/scripts
......................................................................
Porting ndashredir.py from compat to core/scripts
Added ndashredir.py in core/scripts/ as part of Pywikibot:Compat
to Core Migration.
Bug: T66875
Change-Id: Iee31a8d7a1c8c6f7c2b60df8d3d43aef19d8a84d
---
A scripts/ndashredir.py
1 file changed, 168 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core
refs/changes/71/185671/1
diff --git a/scripts/ndashredir.py b/scripts/ndashredir.py
new file mode 100644
index 0000000..5ef74ae
--- /dev/null
+++ b/scripts/ndashredir.py
@@ -0,0 +1,168 @@
+# -*- coding: utf-8 -*-
+u"""
+This script will collect articles that have n or m dash character in title.
+
+After collecting, it will create a redirect to them automatically from the
+corresponding hyphenated title. If the target exists, will be skipped.
+It may take several hours. You may quit by Ctrl C at any time and continue
+later. Type the first few characters of the last shown title after -start.
+
+The script is primarily designed for work in article namespace, but can be used
+in any other one. Use in accordance with the rules of your community.
+
+Known parameters:
+
+-start Will start from the given title (it does not have to exist).
+ Parameter may be given as "-start" or "-start:title"
+ Defaults to '!'.
+
+-namespace Works in the given namespace (only one at a time). Parameter
+
+-ns may be given as "-ns:<number>" or "-namespace:<number>".
+ Defaults to 0 (main namespace).
+
+-nosub Will not process subpages. Useful in template or portal
+ namespace. (Not recommended for main namespace that has no
+ real subpages.)
+
+-save Saves the title of existing hyphenated articles whose content
+ is _other_ than a redirect to the corresponding article with
+ n dash or m dash in the title and thus may need manual
+ treatment. If omitted, these titles will be written only to
+ the screen (or the log if logging is on). The file is in the
+ form you may upload it to a wikipage.
+ May be given as "-save:<filename>". If it exists, titles
+ will be appended.
+ After checking these titles, you may want to write them to
+ your ignore file (see below).
+
+-ignore A file that contains titles that are not to be claimed to
+ redirect somewhere else. For example, if X-1 (with hyphen)
+ redirects to a disambiguation page that lists X–1 (with n
+ dash), that's OK and you don't want it to appear at each run
+ as a problematic article.
+ File must be encoded in UTF-8 and contain titles among double
+ square brackets (e.g. *[[X-1]] or [[:File:X-1.gif]]).
+ May be given as "-ignore:<filename>".
+"""
+
+#
+# (c) Bináris, 2012
+# (c) pywikibot team, 2012-2015
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+#
+
+import codecs
+import re
+import pywikibot
+from pywikibot import pagegenerators, i18n
+
+
+def main(*args):
+ regex = ur'.*[–—]' # Alt 0150 (n dash), alt 0151 (m dash), respectively.
+ ns = 0
+ start = '!'
+ filename = None # The name of the file to save titles
+ titlefile = None # The file object itself
+ ignorefilename = None # The name of the ignore file
+ ignorelist = [] # A list to ignore titles that redirect to somewhere else
+
+ # Handling parameters:
+ for arg in pywikibot.handle_args(*args):
+ if arg == '-start':
+ start = pywikibot.input(
+ u'From which title do you want to continue?')
+ elif arg.startswith('-start:'):
+ start = arg[7:]
+ elif arg in ['-ns', '-namespace']:
+ ns = pywikibot.input(u'Which namespace should we process?')
+ elif arg.startswith('-ns:') or arg.startswith('-namespace:'):
+ ns = arg[arg.find(':') + 1:]
+ elif arg == '-nosub':
+ regex = ur'[^/]*[–—][^/]*$'
+ elif arg == '-save':
+ filename = pywikibot.input('Please enter the filename:')
+ elif arg.startswith('-save:'):
+ filename = arg[6:]
+ elif arg == '-ignore':
+ ignorefilename = pywikibot.input('Please enter the filename:')
+ elif arg.startswith('-ignore:'):
+ ignorefilename = arg[8:]
+
+ # File operations:
+ if filename:
+ try:
+ # This opens in strict error mode, that means bot will stop
+ # on encoding errors with ValueError.
+ # See http://docs.python.org/library/codecs.html#codecs.open
+ titlefile = codecs.open(filename, encoding='utf-8', mode='a')
+ except (OSError, IOError):
+ pywikibot.output("%s cannot be opened." % filename)
+ return
+ if ignorefilename:
+ try:
+ with codecs.open(ignorefilename, encoding='utf-8', mode='r') as
igfile:
+ ignorelist = re.findall(ur'\[\[:?(.*?)\]\]', igfile.read())
+ except (OSError, IOError):
+ pywikibot.output("%s cannot be opened." % ignorefilename)
+ return
+
+ # Ready to initialize
+ site = pywikibot.Site()
+ redirword = site.redirect()
+ gen = pagegenerators.RegexFilterPageGenerator(site.allpages(
+ start=start, namespace=ns, filterredir=False), [regex])
+
+ # Processing:
+ for page in gen:
+ title = page.title()
+ editSummary = i18n.twtranslate(site, 'ndashredir-create',
+ {'title': title})
+ newtitle = title.replace(u'–', '-').replace(u'—', '-')
+ # n dash -> hyphen, m dash -> hyphen, respectively
+ redirpage = pywikibot.Page(site, newtitle)
+ if redirpage.exists():
+ if (redirpage.isRedirectPage() and
+ redirpage.getRedirectTarget() == page):
+ pywikibot.output(u'[[%s]] already redirects to [[%s]], nothing'
+ u' to do with it.' % (newtitle, title))
+ elif newtitle in ignorelist:
+ pywikibot.output(
+ u'Skipping [[%s]] because it is on your ignore list.'
+ % newtitle)
+ else:
+ pywikibot.output(
+ u'\03{lightyellow}Skipping [[%s]] because it exists '
+ u'already with a different content.\03{default}'
+ % newtitle)
+ if titlefile:
+ s = u'\n#%s does not redirect to %s.' % (
+ redirpage.title(asLink=True, textlink=True),
+ page.title(asLink=True, textlink=True))
+ # For the unlikely case if someone wants to run it in
+ # file namespace.
+ titlefile.write(s)
+ titlefile.flush()
+ else:
+ text = u'#%s[[%s]]' % (redirword, title)
+ try:
+ redirpage.put(text, editSummary)
+ except pywikibot.LockedPage:
+ pywikibot.output(
+ (u'\03{lightyellow}Skipping [[%s]] because it is '
+ u'protected.\03{default}') % newtitle)
+ except:
+ pywikibot.output(
+ (u'\03{lightyellow}Skipping [[%s]] because of an error.'
+ u'\03{default}') % newtitle)
+ # Todo: output the title upon Ctrl C? (KeyboardInterrupt always hits
+ # RegexFilterPageGenerator or throttle.py or anything else and cannot
+ # be catched in this loop.)
+ if titlefile:
+ titlefile.close() # For the spirit of programming (it was flushed)
+
+if __name__ == "__main__":
+ main()
--
To view, visit https://gerrit.wikimedia.org/r/185671
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Iee31a8d7a1c8c6f7c2b60df8d3d43aef19d8a84d
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Prianka <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits