jenkins-bot has submitted this change and it was merged.

Change subject: [IMPROV] Redirect: Use dynamic interwiki prefix
......................................................................


[IMPROV] Redirect: Use dynamic interwiki prefix

Instead of guessing that the interwiki prefix is the same as a language
in the family, it is parsing it using the Link class. This also
separates the link label, section and automatically chooses the correct
capitalization.

Change-Id: I36e5c0e9d3ca0af6813a70851159445bc3285253
---
M scripts/redirect.py
1 file changed, 27 insertions(+), 30 deletions(-)

Approvals:
  John Vandenberg: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/scripts/redirect.py b/scripts/redirect.py
index 225d231..c29fa88 100755
--- a/scripts/redirect.py
+++ b/scripts/redirect.py
@@ -82,6 +82,12 @@
 from pywikibot import i18n, xmlreader, Bot
 
 
+def space_to_underscore(link):
+    """Convert spaces to underscore."""
+    # previous versions weren't expecting spaces but underscores
+    return link.canonical_title().replace(' ', '_')
+
+
 class RedirectGenerator:
 
     """Redirect generator."""
@@ -130,45 +136,36 @@
                         not in self.namespaces:
                     continue
             if alsoGetPageTitles:
-                pageTitles.add(entry.title.replace(' ', '_'))
+                pageTitles.add(space_to_underscore(pywikibot.Link(entry.title, 
self.site)))
 
             m = redirR.match(entry.text)
             if m:
                 target = m.group(1)
                 # There might be redirects to another wiki. Ignore these.
-                for code in self.site.family.langs.keys():
-                    if target.startswith('%s:' % code) \
-                            or target.startswith(':%s:' % code):
-                        if code == self.site.language():
-                            # link to our wiki, but with the lang prefix
-                            target = target[(len(code) + 1):]
-                            if target.startswith(':'):
-                                target = target[1:]
-                        else:
-                            pywikibot.output(
-                                u'NOTE: Ignoring %s which is a redirect to %s:'
-                                % (entry.title, code))
-                            target = None
-                            break
+                target_link = pywikibot.Link(target, self.site)
+                try:
+                    target_link.parse()
+                except pywikibot.SiteDefinitionError as e:
+                    pywikibot.log(e)
+                    pywikibot.output(
+                        u'NOTE: Ignoring {0} which is a redirect ({1}) to an '
+                        u'unknown site.'.format(entry.title, target))
+                    target_link = None
+                else:
+                    if target_link.site != self.site:
+                        pywikibot.output(
+                            u'NOTE: Ignoring {0} which is a redirect to '
+                            u'another site {1}.'.format(entry.title, 
target_link.site))
+                        target_link = None
                 # if the redirect does not link to another wiki
-                if target:
-                    source = entry.title.replace(' ', '_')
-                    target = target.replace(' ', '_')
-                    # remove leading and trailing whitespace
-                    target = target.strip('_')
-                    # capitalize the first letter
-                    if not pywikibot.Site().nocapitalize:
-                        source = source[:1].upper() + source[1:]
-                        target = target[:1].upper() + target[1:]
-                    if '#' in target:
-                        target = target[:target.index('#')].rstrip("_")
-                    if '|' in target:
+                if target_link and target_link.title:
+                    source = pywikibot.Link(entry.title, self.site)
+                    if target_link.anchor:
                         pywikibot.output(
                             u'HINT: %s is a redirect with a pipelink.'
                             % entry.title)
-                        target = target[:target.index('|')].rstrip("_")
-                    if target:  # in case preceding steps left nothing
-                        redict[source] = target
+                    redict[space_to_underscore(source)] = (
+                        space_to_underscore(target_link))
         if alsoGetPageTitles:
             return redict, pageTitles
         else:

-- 
To view, visit https://gerrit.wikimedia.org/r/190658
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I36e5c0e9d3ca0af6813a70851159445bc3285253
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Merlijn van Deen <[email protected]>
Gerrit-Reviewer: XZise <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to