jenkins-bot has submitted this change. (
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/602984 )
Change subject: [maintenance] Add update_linktrails.py maintenance script
......................................................................
[maintenance] Add update_linktrails.py maintenance script
- Add update_linktrails.py maintenance script to update linktrail
dict in family.py
- add the script to docs
Change-Id: I5107e472b8959e146d1e6371efecf4bbb546c78e
---
M docs/scripts/scripts.maintenance.rst
M scripts/README.rst
A scripts/maintenance/update_linktrails.py
3 files changed, 176 insertions(+), 1 deletion(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/docs/scripts/scripts.maintenance.rst
b/docs/scripts/scripts.maintenance.rst
index 7a31635..f3dd955 100644
--- a/docs/scripts/scripts.maintenance.rst
+++ b/docs/scripts/scripts.maintenance.rst
@@ -27,9 +27,14 @@
.. automodule:: scripts.maintenance.make_i18n_dict
+scripts.maintenance.update\_linktrails script
+---------------------------------------------
+
+.. automodule:: scripts.maintenance.update_linktrails
+
+
scripts.maintenance.wikimedia\_sites script
-------------------------------------------
.. automodule:: scripts.maintenance.wikimedia_sites
-
diff --git a/scripts/README.rst b/scripts/README.rst
index 5b009e5..de5014e 100644
--- a/scripts/README.rst
+++ b/scripts/README.rst
@@ -280,6 +280,8 @@
+------------------------+---------------------------------------------------------+
| make_i18n_dict.py | Generate a i18n file from a given script.
|
+------------------------+---------------------------------------------------------+
+ | update_linktrails.py | Script that updates the linktrails in family.py
file. |
+
+------------------------+---------------------------------------------------------+
| wikimedia_sites.py | Updates the language lists in Wikimedia family
files. |
+------------------------+---------------------------------------------------------+
diff --git a/scripts/maintenance/update_linktrails.py
b/scripts/maintenance/update_linktrails.py
new file mode 100644
index 0000000..bc50819
--- /dev/null
+++ b/scripts/maintenance/update_linktrails.py
@@ -0,0 +1,168 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""Script that updates the linktrails in family.py file.
+
+linktrails contains a regex for each site code which holds letters that
+can follow a wikilink and are regarded as part of this link. This depends
+on the linktrail setting in LanguageXx.php. This maintenance script
+retrieves the site settings from wikipedia family and updates the Family
+linktrails dict.
+"""
+#
+# (C) Pywikibot team, 2017-2020
+#
+# Distributed under the terms of the MIT license.
+#
+
+import codecs
+import re
+
+from contextlib import closing
+from os.path import join
+
+import pywikibot
+
+from pywikibot.family import CODE_CHARACTERS
+from pywikibot.tools import suppress_warnings
+
+
+def format_string(code: str, pattern: str) -> str:
+ """Format a single pattern line."""
+ fmt = ' ' * 8 + "'{}': '{}'"
+ code_len = len(code)
+ pattern_len = len(pattern)
+ if pattern_len > 63 - code_len:
+ index = pattern_len // 2
+ result = fmt.format(code, pattern[:index]) + '\n'
+ result += ' ' * (code_len + 12) + "'{}',\n".format(pattern[index:])
+ else:
+ result = fmt.format(code, pattern) + ',\n'
+ return result
+
+
+def coroutine(func):
+ """Decorator which starts coroutine."""
+ def start(*args, **kwargs):
+ cr = func(*args, **kwargs)
+ cr.send(None)
+ return cr
+ return start
+
+
+@coroutine
+def update_sites(fam):
+ """Process linktrail for a given site code."""
+ formatter = update_line()
+ while True:
+ code = yield
+
+ with suppress_warnings(
+ 'Site wikipedia:[{}]+ instantiated using different code'
+ .format(CODE_CHARACTERS),
+ category=UserWarning,
+ filename=r'.+pywikibot.tools.__init__\.py'):
+ site = pywikibot.Site(code, 'wikipedia')
+
+ if isinstance(site, pywikibot.site.RemovedSite):
+ continue
+
+ if site.code != code:
+ pywikibot.output('"{}" is redirected to "{}"; skipping.'
+ .format(code, site.code))
+ continue
+
+ linktrail = site.siteinfo.get('general', expiry=True)['linktrail']
+ oldtrail = fam.linktrails.get(code)
+ formatter.send((code, oldtrail, linktrail))
+
+
+@coroutine
+def update_line():
+ """Format linktrail for family file."""
+ writer = update_family_file()
+ matcher = update_matched_line(writer)
+ while True:
+ code, old, linktrail = yield
+ line = format_string(code, old) if old else ''
+
+ if not linktrail:
+ writer.send(line)
+ continue
+
+ if linktrail == '/^()(.*)$/sD': # empty linktrail
+ line = format_string(code, '')
+ writer.send(line)
+ continue
+
+ match = re.search(
+ r'\((?:\:\?|\?\:)?\[(?P<pattern>.+?)\]'
+ r'(?P<letters>(\|.)*)\)?\+\)',
+ linktrail)
+
+ if not match:
+ pywikibot.output('"{}": No pattern found in "{}"'
+ .format(code, linktrail))
+ writer.send(line)
+ continue
+
+ matcher.send((code, old, match))
+
+
+@coroutine
+def update_matched_line(writer):
+ """Update matched linktrail."""
+ while True:
+ code, old, match = yield
+ pattern = match.group('pattern')
+ letters = match.group('letters')
+ if pattern == 'a-z' and not letters: # default
+ if old:
+ pywikibot.output('"{}" has default linktrail; '
+ 'removing {}'.format(code, old))
+ continue
+
+ if r'x{' in pattern:
+ # replace unicode escape string by corresponding char
+ pattern = re.sub(
+ r'\\x\{([A-F0-9]{4})\}',
+ lambda match: chr(int(match.group(1), 16)),
+ pattern)
+
+ if letters:
+ pattern += ''.join(letters.split('|'))
+
+ new = '[{}]*'.format(pattern)
+ line = format_string(code, new)
+ writer.send(line)
+
+
+@coroutine
+def update_family_file():
+ """Collect linktrails and write them to family.py."""
+ text = " linktrails = {\n '_default': '[a-z]*',\n"
+ try:
+ while True:
+ text += yield
+ except GeneratorExit:
+ text += ' }'
+ # write lintrails to family file
+ pywikibot.output('Writing family file...')
+ family_file_name = join('pywikibot', 'family.py')
+ with codecs.open(family_file_name, 'r', 'utf8') as family_file:
+ family_text = family_file.read()
+ family_text = re.sub(r'(?msu)^ {4}linktrails.+?\}',
+ text, family_text, 1)
+ with codecs.open(family_file_name, 'w', 'utf8') as family_file:
+ family_file.write(family_text)
+
+
+def update_linktrails(family):
+ """Update linktrails for given family."""
+ with closing(update_sites(family)) as updater:
+ for code in sorted(family.langs):
+ updater.send(code)
+
+
+if __name__ == '__main__':
+ site = pywikibot.Site('en', 'wikipedia')
+ update_linktrails(site.family)
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/602984
To unsubscribe, or for help writing mail filters, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I5107e472b8959e146d1e6371efecf4bbb546c78e
Gerrit-Change-Number: 602984
Gerrit-PatchSet: 21
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: Dvorapa <[email protected]>
Gerrit-Reviewer: Framawiki <[email protected]>
Gerrit-Reviewer: Huji <[email protected]>
Gerrit-Reviewer: Matěj Suchánek <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: Zhuyifei1999 <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: D3r1ck01 <[email protected]>
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits