jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/602984 )

Change subject: [maintenance] Add update_linktrails.py maintenance script
......................................................................

[maintenance] Add update_linktrails.py maintenance script

- Add update_linktrails.py maintenance script to update linktrail
  dict in family.py
- add the script to docs

Change-Id: I5107e472b8959e146d1e6371efecf4bbb546c78e
---
M docs/scripts/scripts.maintenance.rst
M scripts/README.rst
A scripts/maintenance/update_linktrails.py
3 files changed, 176 insertions(+), 1 deletion(-)

Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/docs/scripts/scripts.maintenance.rst 
b/docs/scripts/scripts.maintenance.rst
index 7a31635..f3dd955 100644
--- a/docs/scripts/scripts.maintenance.rst
+++ b/docs/scripts/scripts.maintenance.rst
@@ -27,9 +27,14 @@

 .. automodule:: scripts.maintenance.make_i18n_dict

+scripts.maintenance.update\_linktrails script
+---------------------------------------------
+
+.. automodule:: scripts.maintenance.update_linktrails
+
+
 scripts.maintenance.wikimedia\_sites script
 -------------------------------------------

 .. automodule:: scripts.maintenance.wikimedia_sites
 
-
diff --git a/scripts/README.rst b/scripts/README.rst
index 5b009e5..de5014e 100644
--- a/scripts/README.rst
+++ b/scripts/README.rst
@@ -280,6 +280,8 @@
     
+------------------------+---------------------------------------------------------+
     | make_i18n_dict.py      | Generate a i18n file from a given script.       
        |
     
+------------------------+---------------------------------------------------------+
+    | update_linktrails.py   | Script that updates the linktrails in family.py 
file.   |
+    
+------------------------+---------------------------------------------------------+
     | wikimedia_sites.py     | Updates the language lists in Wikimedia family 
files.   |
     
+------------------------+---------------------------------------------------------+

diff --git a/scripts/maintenance/update_linktrails.py 
b/scripts/maintenance/update_linktrails.py
new file mode 100644
index 0000000..bc50819
--- /dev/null
+++ b/scripts/maintenance/update_linktrails.py
@@ -0,0 +1,168 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""Script that updates the linktrails in family.py file.
+
+linktrails contains a regex for each site code which holds letters that
+can follow a wikilink and are regarded as part of this link. This depends
+on the linktrail setting in LanguageXx.php. This maintenance script
+retrieves the site settings from wikipedia family and updates the Family
+linktrails dict.
+"""
+#
+# (C) Pywikibot team, 2017-2020
+#
+# Distributed under the terms of the MIT license.
+#
+
+import codecs
+import re
+
+from contextlib import closing
+from os.path import join
+
+import pywikibot
+
+from pywikibot.family import CODE_CHARACTERS
+from pywikibot.tools import suppress_warnings
+
+
+def format_string(code: str, pattern: str) -> str:
+    """Format a single pattern line."""
+    fmt = ' ' * 8 + "'{}': '{}'"
+    code_len = len(code)
+    pattern_len = len(pattern)
+    if pattern_len > 63 - code_len:
+        index = pattern_len // 2
+        result = fmt.format(code, pattern[:index]) + '\n'
+        result += ' ' * (code_len + 12) + "'{}',\n".format(pattern[index:])
+    else:
+        result = fmt.format(code, pattern) + ',\n'
+    return result
+
+
+def coroutine(func):
+    """Decorator which starts coroutine."""
+    def start(*args, **kwargs):
+        cr = func(*args, **kwargs)
+        cr.send(None)
+        return cr
+    return start
+
+
+@coroutine
+def update_sites(fam):
+    """Process linktrail for a given site code."""
+    formatter = update_line()
+    while True:
+        code = yield
+
+        with suppress_warnings(
+            'Site wikipedia:[{}]+ instantiated using different code'
+            .format(CODE_CHARACTERS),
+            category=UserWarning,
+                filename=r'.+pywikibot.tools.__init__\.py'):
+            site = pywikibot.Site(code, 'wikipedia')
+
+        if isinstance(site, pywikibot.site.RemovedSite):
+            continue
+
+        if site.code != code:
+            pywikibot.output('"{}" is redirected to "{}"; skipping.'
+                             .format(code, site.code))
+            continue
+
+        linktrail = site.siteinfo.get('general', expiry=True)['linktrail']
+        oldtrail = fam.linktrails.get(code)
+        formatter.send((code, oldtrail, linktrail))
+
+
+@coroutine
+def update_line():
+    """Format linktrail for family file."""
+    writer = update_family_file()
+    matcher = update_matched_line(writer)
+    while True:
+        code, old, linktrail = yield
+        line = format_string(code, old) if old else ''
+
+        if not linktrail:
+            writer.send(line)
+            continue
+
+        if linktrail == '/^()(.*)$/sD':  # empty linktrail
+            line = format_string(code, '')
+            writer.send(line)
+            continue
+
+        match = re.search(
+            r'\((?:\:\?|\?\:)?\[(?P<pattern>.+?)\]'
+            r'(?P<letters>(\|.)*)\)?\+\)',
+            linktrail)
+
+        if not match:
+            pywikibot.output('"{}": No pattern found in "{}"'
+                             .format(code, linktrail))
+            writer.send(line)
+            continue
+
+        matcher.send((code, old, match))
+
+
+@coroutine
+def update_matched_line(writer):
+    """Update matched linktrail."""
+    while True:
+        code, old, match = yield
+        pattern = match.group('pattern')
+        letters = match.group('letters')
+        if pattern == 'a-z' and not letters:  # default
+            if old:
+                pywikibot.output('"{}" has default linktrail; '
+                                 'removing {}'.format(code, old))
+            continue
+
+        if r'x{' in pattern:
+            # replace unicode escape string by corresponding char
+            pattern = re.sub(
+                r'\\x\{([A-F0-9]{4})\}',
+                lambda match: chr(int(match.group(1), 16)),
+                pattern)
+
+        if letters:
+            pattern += ''.join(letters.split('|'))
+
+        new = '[{}]*'.format(pattern)
+        line = format_string(code, new)
+        writer.send(line)
+
+
+@coroutine
+def update_family_file():
+    """Collect linktrails and write them to family.py."""
+    text = "    linktrails = {\n        '_default': '[a-z]*',\n"
+    try:
+        while True:
+            text += yield
+    except GeneratorExit:
+        text += '    }'
+        # write lintrails to family file
+        pywikibot.output('Writing family file...')
+        family_file_name = join('pywikibot', 'family.py')
+        with codecs.open(family_file_name, 'r', 'utf8') as family_file:
+            family_text = family_file.read()
+        family_text = re.sub(r'(?msu)^ {4}linktrails.+?\}',
+                             text, family_text, 1)
+        with codecs.open(family_file_name, 'w', 'utf8') as family_file:
+            family_file.write(family_text)
+
+
+def update_linktrails(family):
+    """Update linktrails for given family."""
+    with closing(update_sites(family)) as updater:
+        for code in sorted(family.langs):
+            updater.send(code)
+
+
+if __name__ == '__main__':
+    site = pywikibot.Site('en', 'wikipedia')
+    update_linktrails(site.family)

--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/602984
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I5107e472b8959e146d1e6371efecf4bbb546c78e
Gerrit-Change-Number: 602984
Gerrit-PatchSet: 21
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: Dvorapa <[email protected]>
Gerrit-Reviewer: Framawiki <[email protected]>
Gerrit-Reviewer: Huji <[email protected]>
Gerrit-Reviewer: Matěj Suchánek <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: Zhuyifei1999 <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: D3r1ck01 <[email protected]>
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits

Reply via email to