jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/579877 )

Change subject: [IMPR] use linktrail via siteinfo
......................................................................

[IMPR] use linktrail via siteinfo

- create linktrail regex from siteinfo['general']['linktrail']
- use tiny cache to cache the result
- Previously Site.linktrail() was delegated to family.linktrail()
  by magic Site.__getattr__(). Deprecate this useless method now.
- Also remove obsolete linktrails dict
- Add TestLinktrails to site_tests.py
- Update DrySite class with a default linktrail
- Remove update_linktrails.py maintenance script
- update documentation

Change-Id: Ie12ddb65f2ed9a9d520b39a4c372d3ee5d9f6309
---
M .codecov.yml
M docs/scripts/maintenance.rst
M docs/scripts_ref/scripts.maintenance.rst
M pywikibot/family.py
M pywikibot/scripts/generate_family_file.py
M pywikibot/site/_apisite.py
M scripts/README.rst
D scripts/maintenance/update_linktrails.py
M tests/site_tests.py
M tests/utils.py
10 files changed, 117 insertions(+), 404 deletions(-)

Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/.codecov.yml b/.codecov.yml
index 2ba30a4..f1a70ae 100644
--- a/.codecov.yml
+++ b/.codecov.yml
@@ -37,7 +37,6 @@
     - scripts/maintenance/make_i18n_dict.py
     - scripts/maintenance/preload_sites.py
     - scripts/maintenance/sorting_order.py
-    - scripts/maintenance/update_linktrails.py
     - scripts/maintenance/wikimedia_sites.py
     - scripts/userscripts/
     - tests/pwb/
diff --git a/docs/scripts/maintenance.rst b/docs/scripts/maintenance.rst
index d096563..6f3ed7d 100644
--- a/docs/scripts/maintenance.rst
+++ b/docs/scripts/maintenance.rst
@@ -25,12 +25,6 @@
 .. automodule:: scripts.maintenance.sorting_order
    :no-members:

-update\_linktrails script description
--------------------------------------
-
-.. automodule:: scripts.maintenance.update_linktrails
-   :no-members:
-
 wikimedia\_sites script description
 -----------------------------------

diff --git a/docs/scripts_ref/scripts.maintenance.rst 
b/docs/scripts_ref/scripts.maintenance.rst
index e7bb9c4..c331693 100644
--- a/docs/scripts_ref/scripts.maintenance.rst
+++ b/docs/scripts_ref/scripts.maintenance.rst
@@ -32,11 +32,6 @@

 .. automodule:: scripts.maintenance.sorting_order

-scripts.maintenance.update\_linktrails script
----------------------------------------------
-
-.. automodule:: scripts.maintenance.update_linktrails
-
 scripts.maintenance.wikimedia\_sites script
 -------------------------------------------

diff --git a/pywikibot/family.py b/pywikibot/family.py
index 871a36c..4f80f39 100644
--- a/pywikibot/family.py
+++ b/pywikibot/family.py
@@ -21,7 +21,7 @@
 from pywikibot import config
 from pywikibot.backports import Dict, List, Set, Tuple  # skipcq: PY-W2000
 from pywikibot.exceptions import FamilyMaintenanceWarning, UnknownFamilyError
-from pywikibot.tools import classproperty, deprecated
+from pywikibot.tools import classproperty, deprecated, remove_last_args


 logger = logging.getLogger('pywiki.wiki.family')
@@ -29,7 +29,7 @@
 # Legal characters for Family.name and Family.langs keys
 NAME_CHARACTERS = string.ascii_letters + string.digits
 # nds_nl code alias requires "_"n
-# dash must be the last char to be reused as regex in update_linktrails
+# dash must be the last char to be reused as regex
 CODE_CHARACTERS = string.ascii_lowercase + string.digits + '_-'


@@ -157,211 +157,6 @@
     fyinterwiki.sort(key=lambda x:
                      x.replace('y', 'i') + x.count('y') * '!')

-    # Letters that can follow a wikilink and are regarded as part of
-    # this link. This depends on the linktrail setting in LanguageXx.php
-    #
-    # Do not use this dict directly but Site.linktrail or Family.linktrail
-    # methods instead
-    linktrails = {
-        '_default': '[a-z]*',
-        'ab': '[a-zабвгӷҕдежзӡикқҟлмнопԥҧрстҭуфхҳцҵчҷҽҿшыҩџьә]*',
-        'ady': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӀ]*',
-        'als': '[äöüßa-z]*',
-        'alt': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяјҥӧӱ]*',
-        'ami': '',
-        'an': '[a-záéíóúñ]*',
-        'ar': '[a-zء-يؐ-ًؚ-ٰٟۖ-ۜ۟-۪ۤۧۨ-ۭ]*',
-        'ary': '[a-zء-يؐ-ًؚ-ٰٟۖ-ۜ۟-۪ۤۧۨ-ۭ]*',
-        'arz': '[a-zء-يؐ-ًؚ-ٰٟۖ-ۜ۟-۪ۤۧۨ-ۭ]*',
-        'ast': '[a-záéíóúñ]*',
-        'atj': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
-        'av': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӀ]*',
-        'avk': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
-        'awa': '[a-zऀ-ॣ०-꣠-ꣿ]*',
-        'ay': '[a-záéíóúñ]*',
-        'az': '[a-zçəğıöşü]*',
-        'azb': '[ابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیآأئؤة‌]*',
-        'ba': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяәөүғҡңҙҫһ“»]*',
-        'bar': '[äöüßa-z]*',
-        'bat-smg': '[a-ząčęėįšųūž]*',
-        'be': '[абвгґджзеёжзійклмнопрстуўфхцчшыьэюяćčłńśšŭźža-z]*',
-        'be-tarask': '[абвгґджзеёжзійклмнопрстуўфхцчшыьэюяćčłńśšŭźža-z]*',
-        'bg': '[a-zабвгдежзийклмнопрстуфхцчшщъыьэюя]*',
-        'bm': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
-        'bn': '[ঀ-৿]*',
-        'bpy': '[ঀ-৿]*',
-        'br': "(?:[a-zA-ZàâçéèêîôûäëïöüùñÇÉÂÊÎÔÛÄËÏÖÜÀÈÙÑ]|[cC]['’]h|C['’]H)*",
-        'bs': '[a-zćčžšđž]*',
-        'bxr': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
-        'ca': "(?:[a-zàèéíòóúç·ïü]|'(?!'))*",
-        'cbk-zam': '[a-záéíóúñ]*',
-        'ce': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӀ]*',
-        'ckb': '[ئابپتجچحخدرڕزژسشعغفڤقکگلڵمنوۆهھەیێ‌]*',
-        'co': '[a-zàéèíîìóòúù]*',
-        'crh': '[a-zâçğıñöşüа-яёʺʹ“»]*',
-        'cs': '[a-záčďéěíňóřšťúůýž]*',
-        'csb': '[a-zęóąśłżźćńĘÓĄŚŁŻŹĆŃ]*',
-        'cu': '[a-zабвгдеєжѕзїіıићклмнопсстѹфхѡѿцчш'
-              'щъыьѣюѥѧѩѫѭѯѱѳѷѵґѓђёјйљњќуўџэ҄я“»]*',
-        'cv': '[a-zа-яĕçăӳ"»]*',
-        'cy': '[àáâèéêìíîïòóôûŵŷa-z]*',
-        'da': '[a-zæøå]*',
-        'dag': '[ɛɣŋɔʒƐƔŊƆƷa-z]*',
-        'de': '[äöüßa-z]*',
-        'din': '[äëɛɛ̈éɣïŋöɔɔ̈óa-z]*',
-        'dsb': '[äöüßa-z]*',
-        'el': '[a-zαβγδεζηθικλμνξοπρστυφχψωςΑΒΓΔΕΖΗΘ'
-              'ΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩάέήίόύώϊϋΐΰΆΈΉΊΌΎΏΪΫ]*',
-        'eml': '[a-zàéèíîìóòúù]*',
-        'es': '[a-záéíóúñ]*',
-        'et': '[äöõšüža-z]*',
-        'ext': '[a-záéíóúñ]*',
-        'fa': '[ابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیآأئؤة‌]*',
-        'ff': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
-        'fi': '[a-zäö]*',
-        'fiu-vro': '[äöõšüža-z]*',
-        'fo': '[áðíóúýæøa-z]*',
-        'fr': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
-        'frp': '[a-zàâçéèêîœôû·’æäåāăëēïīòöōùü‘]*',
-        'frr': '[a-zäöüßåāđē]*',
-        'fur': '[a-zàéèíîìóòúù]*',
-        'fy': '[a-zàáèéìíòóùúâêîôûäëïöü]*',
-        'gag': '[a-zÇĞçğİıÖöŞşÜüÂâÎîÛû]*',
-        'gan': '',
-        'gcr': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
-        'gl': '[áâãàéêẽçíòóôõq̃úüűũa-z]*',
-        'glk': '[ابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیآأئؤة‌]*',
-        'gn': '[a-záéíóúñ]*',
-        'gu': '[઀-૿]*',
-        'guw': '[a-zàáǎèéěìíǐòóǒùúɛ̌ɔɖẹọ]*',
-        'he': '[a-zא-ת]*',
-        'hi': '[a-zऀ-ॣ०-꣠-ꣿ]*',
-        'hr': '[čšžćđßa-z]*',
-        'hsb': '[äöüßa-z]*',
-        'ht': '[a-zàèòÀÈÒ]*',
-        'hu': '[a-záéíóúöüőűÁÉÍÓÚÖÜŐŰ]*',
-        'hy': '[a-zաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆև«»]*',
-        'hyw': '[a-zաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆև«»]*',
-        'ii': '',
-        'inh': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӀ]*',
-        'is': '[áðéíóúýþæöa-z-–]*',
-        'it': '[a-zàéèíîìóòúù]*',
-        'ka': '[a-zაბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰ“»]*',
-        'kaa': "(?:[a-zıʼ’“»]|'(?!'))*",
-        'kab': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
-        'kbd': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӀ]*',
-        'kbp': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
-        'kk': '[a-zäçéğıïñöşüýʺʹа-яёәғіқңөұүһٴ'
-              'ابپتجحدرزسشعفقكلمنڭەوۇۋۆىيچھ“»]*',
-        'kl': '[a-zæøå]*',
-        'koi': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
-        'krc': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
-        'ksh': '[äöüėëijßəğåůæœça-z]*',
-        'ku': '[a-zçêîşûẍḧÇÊÎŞÛẌḦ]*',
-        'kv': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
-        'lad': '[a-záéíóúñ]*',
-        'lb': '[äöüßa-z]*',
-        'lbe': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӀ1“»]*',
-        'lez': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӀ]*',
-        'li': '[a-zäöüïëéèà]*',
-        'lij': '[a-zàéèíîìóòúù]*',
-        'lld': '[a-zàéèíîìóòúù]*',
-        'lmo': '[a-zàéèíîìóòúù]*',
-        'ln': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
-        'lrc': '[ابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیآأئؤة‌]*',
-        'lt': '[a-ząčęėįšųūž]*',
-        'ltg': '[a-zA-ZĀāČčĒēĢģĪīĶķĻļŅņŠšŪūŽž]*',
-        'lv': '[a-zA-ZĀāČčĒēĢģĪīĶķĻļŅņŠšŪūŽž]*',
-        'mai': '[a-zऀ-ॣ०-꣠-ꣿ]*',
-        'mdf': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
-        'mg': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
-        'mhr': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
-        'mk': '[a-zабвгдѓежзѕијклљмнњопрстќуфхцчџш]*',
-        'ml': '[a-zം-ൿ]*',
-        'mn': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя“»]*',
-        'mr': '[ऀ-ॣॱ-ॿ‍]*',
-        'mrj': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
-        'mwl': '[áâãàéêẽçíòóôõq̃úüűũa-z]*',
-        'myv': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
-        'mzn': '[ابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیآأئؤة‌]*',
-        'nah': '[a-záéíóúñ]*',
-        'nap': '[a-zàéèíîìóòúù]*',
-        'nds': '[äöüßa-z]*',
-        'nds-nl': '[a-zäöüïëéèà]*',
-        'nl': '[a-zäöüïëéèà]*',
-        'nn': '[æøåa-z]*',
-        'no': '[æøåa-z]*',
-        'nrm': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
-        'oc': '[a-zàâçéèêîôû]*',
-        'olo': '[a-zčČšŠžŽäÄöÖ]*',
-        'or': '[a-z଀-୿]*',
-        'os': '[a-zаæбвгдеёжзийклмнопрстуфхцчшщъыьэюя“»]*',
-        'pa': '[ਁਂਃਅਆਇਈਉਊਏਐਓਔਕਖਗਘਙਚਛਜਝਞਟਠਡਢਣਤਥਦਧਨਪਫਬਭਮ'
-              'ਯਰਲਲ਼ਵਸ਼ਸਹ਼ਾਿੀੁੂੇੈੋੌ੍ਖ਼ਗ਼ਜ਼ੜਫ਼ੰੱੲੳa-z]*',
-        'pcd': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
-        'pdc': '[äöüßa-z]*',
-        'pfl': '[äöüßa-z]*',
-        'pl': '[a-zęóąśłżźćńĘÓĄŚŁŻŹĆŃ]*',
-        'pms': '[a-zàéèíîìóòúù]*',
-        'pnt': '[a-zαβγδεζηθικλμνξοπρστυφχψωςΑΒΓΔΕΖΗΘ'
-               'ΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩάέήίόύώϊϋΐΰΆΈΉΊΌΎΏΪΫ]*',
-        'pt': '[áâãàéêẽçíòóôõq̃úüűũa-z]*',
-        'pwn': '',
-        'qu': '[a-záéíóúñ]*',
-        'rmy': '[a-zăâîşţșțĂÂÎŞŢȘȚ]*',
-        'ro': '[a-zăâîşţșțĂÂÎŞŢȘȚ]*',
-        'roa-rup': '[a-zăâîşţșțĂÂÎŞŢȘȚ]*',
-        'roa-tara': '[a-zàéèíîìóòúù]*',
-        'ru': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
-        'rue': '[a-zабвгґдеєжзиіїйклмнопрстуфхцчшщьєюяёъы“»]*',
-        'sa': '[a-zऀ-ॣ०-꣠-ꣿ]*',
-        'sah': '[a-zабвгҕдеёжзийклмнҥоөпрсһтуүфхцчшщъыьэюя]*',
-        'scn': '[a-zàéèíîìóòúù]*',
-        'se': '[a-zàáâçčʒǯđðéèêëǧǥȟíìîïıǩŋñóòôõßšŧúùûýÿüžþæøåäö]*',
-        'sg': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
-        'sh': '[a-zčćđžš]*',
-        'shi': '[ⴰ-ⵯa-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙḍḥɛṛɣṣṭẓḌḤƐṚƔṢṬẒʷ]*',
-        'sk': '[a-záäčďéíľĺňóôŕšťúýž]*',
-        'skr': '[آابٻپتٹثجچڄحخدڈݙذرڑزژسشصضطظعغفقکگڳلمنݨوہھیےئأءۃڋڰںؤ]*',
-        'sl': '[a-zčćđžš]*',
-        'smn': '[a-zâčđŋšžäá]*',
-        'sr': '[abvgdđežzijklljmnnjoprstćufhcčdž'
-              'šабвгдђежзијклљмнњопрстћуфхцчџш]*',
-        'srn': '[a-zäöüïëéèà]*',
-        'stq': '[äöüßa-z]*',
-        'sv': '[a-zåäöéÅÄÖÉ]*',
-        'szl': '[a-zęóąśłżźćńĘÓĄŚŁŻŹĆŃ]*',
-        'szy': '',
-        'ta': '[஀-௿]*',
-        'tay': '',
-        'te': '[ఁ-౯]*',
-        'tet': '[áâãàéêẽçíòóôõq̃úüűũa-z]*',
-        'tg': '[a-zабвгдеёжзийклмнопрстуфхчшъэюяғӣқўҳҷцщыь]*',
-        'tk': '[a-zÄäÇçĞğŇňÖöŞşÜüÝýŽž]*',
-        'tr': '[a-zÇĞçğİıÖöŞşÜüÂâÎîÛû]*',
-        'trv': '',
-        'tt': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӘәӨөҮүҖҗҢңҺһ]*',
-        'ty': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
-        'tyv': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
-        'udm': '[a-zа-яёӝӟӥӧӵ]*',
-        'uk': '[a-zабвгґдеєжзиіїйклмнопрстуфхцчшщьєюяёъы“»]*',
-        'ur': '[ابپتٹثجچحخدڈذر​ڑ​زژسشصضطظعغفقکگل​م​نںوؤہھیئےآأءۃ]*',
-        'uz': '[a-zʻʼ“»]*',
-        'vec': '[a-zàéèíîìóòúù]*',
-        'vep': '[äöõšüža-z]*',
-        'vi': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
-        'vls': '[a-zäöüïëéèà]*',
-        'wa': '[a-zåâêîôûçéè]*',
-        'wo': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
-        'wuu': '',
-        'xal': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
-        'xmf': '[a-zაბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰ“»]*',
-        'yi': '[a-zא-ת]*',
-        'za': '',
-        'zea': '[a-zäöüïëéèà]*',
-        'zh': '',
-    }
-
     # A list of category redirect template names in different languages
     category_redirect_templates = {
         '_default': []
@@ -617,20 +412,17 @@
         Family._families[fam] = cls
         return cls

-    def linktrail(self, code, fallback: str = '_default'):
+    @deprecated('APISite.linktrail()', since='7.3.0')
+    @remove_last_args(['fallback'])
+    def linktrail(self, code: str) -> str:
         """Return regex for trailing chars displayed as part of a link.

         Returns a string, not a compiled regular expression object.
+
+        .. deprecated:: 7.3
         """
-        if code in self.linktrails:
-            return self.linktrails[code]
-
-        if fallback:
-            return self.linktrails[fallback]
-
-        raise KeyError(
-            'ERROR: linktrail in language {language_code} unknown'
-            .format(language_code=code))
+        site = pywikibot.Site(code, 'wikipedia')
+        return site.linktrail()

     def category_redirects(self, code, fallback: str = '_default'):
         """Return list of category redirect templates."""
diff --git a/pywikibot/scripts/generate_family_file.py 
b/pywikibot/scripts/generate_family_file.py
index 817409c..9b22ee3 100755
--- a/pywikibot/scripts/generate_family_file.py
+++ b/pywikibot/scripts/generate_family_file.py
@@ -43,7 +43,7 @@
 # Legal characters for Family name and Family langs keys
 NAME_CHARACTERS = string.ascii_letters + string.digits
 # nds_nl code alias requires "_"n
-# dash must be the last char to be reused as regex in update_linktrails
+# dash must be the last char to be reused as regex
 CODE_CHARACTERS = string.ascii_lowercase + string.digits + '_-'


diff --git a/pywikibot/site/_apisite.py b/pywikibot/site/_apisite.py
index 98c2c81..1d6e174 100644
--- a/pywikibot/site/_apisite.py
+++ b/pywikibot/site/_apisite.py
@@ -69,6 +69,7 @@
 from pywikibot.site._tokenwallet import TokenWallet
 from pywikibot.site._upload import Uploader
 from pywikibot.tools import (
+    cached,
     MediaWikiVersion,
     deprecated,
     merge_unique_dicts,
@@ -671,6 +672,47 @@
         assert '$1' in path, 'articlepath must contain "$1" placeholder'
         return path.replace('$1', '{}')

+    @cached
+    def linktrail(self) -> str:
+        """Build linktrail regex from siteinfo linktrail.
+
+        Letters that can follow a wikilink and are regarded as part of
+        this link. This depends on the linktrail setting in LanguageXx.php
+
+        .. versionadded:: 7.3
+
+        :return: The linktrail regex.
+        """
+        unresolved_linktrails = {
+            'br': '(?:[a-zA-ZàâçéèêîôûäëïöüùñÇÉÂÊÎÔÛÄËÏÖÜÀÈÙÑ]'
+                  "|[cC]['’]h|C['’]H)*",
+            'ca': "(?:[a-zàèéíòóúç·ïü]|'(?!'))*",
+            'kaa': "(?:[a-zıʼ’“»]|'(?!'))*",
+        }
+        linktrail = self.siteinfo['general']['linktrail']
+        if linktrail == '/^()(.*)$/sD':  # empty linktrail
+            return ''
+
+        match = re.search(r'\((?:\:\?|\?\:)?\[(?P<pattern>.+?)\]'
+                          r'(?P<letters>(\|.)*)\)?\+\)', linktrail)
+        if not match:
+            with suppress(KeyError):
+                return unresolved_linktrails[self.code]
+            raise KeyError(
+                '"{}": No linktrail pattern extracted from "{}"'
+                .format(self.code, linktrail))
+
+        pattern = match.group('pattern')
+        letters = match.group('letters')
+
+        if r'x{' in pattern:
+            pattern = re.sub(r'\\x\{([A-F0-9]{4})\}',
+                             lambda match: chr(int(match.group(1), 16)),
+                             pattern)
+        if letters:
+            pattern += ''.join(letters.split('|'))
+        return '[{}]*'.format(pattern)
+
     @staticmethod
     def assert_valid_iter_params(
         msg_prefix: str,
diff --git a/scripts/README.rst b/scripts/README.rst
index 7282cc6..519b755 100644
--- a/scripts/README.rst
+++ b/scripts/README.rst
@@ -172,8 +172,6 @@
     
+------------------------+---------------------------------------------------------+
     | sorting_order.py       | Updates interwiki sorting order in family.py 
file.      |
     
+------------------------+---------------------------------------------------------+
-    | update_linktrails.py   | Script that updates the linktrails in family.py 
file.   |
-    
+------------------------+---------------------------------------------------------+
     | wikimedia_sites.py     | Updates the language lists in Wikimedia family 
files.   |
     
+------------------------+---------------------------------------------------------+

diff --git a/scripts/maintenance/update_linktrails.py 
b/scripts/maintenance/update_linktrails.py
deleted file mode 100755
index abfeddd..0000000
--- a/scripts/maintenance/update_linktrails.py
+++ /dev/null
@@ -1,172 +0,0 @@
-#!/usr/bin/python3
-"""Script that updates the linktrails in family.py file.
-
-linktrails contains a regex for each site code which holds letters that
-can follow a wikilink and are regarded as part of this link. This depends
-on the linktrail setting in LanguageXx.php. This maintenance script
-retrieves the site settings from wikipedia family and updates the Family
-linktrails dict.
-"""
-#
-# (C) Pywikibot team, 2017-2021
-#
-# Distributed under the terms of the MIT license.
-#
-
-import codecs
-import re
-from contextlib import closing
-from os.path import join
-
-import pywikibot
-from pywikibot.family import CODE_CHARACTERS
-from pywikibot.tools import suppress_warnings
-
-
-def format_string(code: str, pattern: str) -> str:
-    """Format a single pattern line."""
-    fmt = ' ' * 8 + "'{}': {!r}"
-    code_len = len(code)
-    pattern_len = len(pattern)
-
-    if pattern_len > 64 - code_len:
-        index = pattern_len // 2
-        result = fmt.format(code, pattern[:index]) + '\n'
-        result += ' ' * (code_len + 12) + repr(pattern[index:])
-    else:
-        result = fmt.format(code, pattern)
-
-    result += ',\n'
-    # convert escape sequences of unprintable characters to unicode
-    result = re.sub(r'\\u([a-f0-9]{4})',
-                    lambda match: chr(int(match.group(1), 16)), result)
-
-    return result
-
-
-def coroutine(func):
-    """Decorator which starts coroutine."""
-    def start(*args, **kwargs):
-        cr = func(*args, **kwargs)
-        cr.send(None)
-        return cr
-    return start
-
-
-@coroutine
-def update_sites(fam):
-    """Process linktrail for a given site code."""
-    formatter = update_line()
-    while True:
-        code = yield
-
-        with suppress_warnings(
-            'Site wikipedia:[{}]+ instantiated using different code'
-            .format(CODE_CHARACTERS),
-            category=UserWarning,
-                filename=r'.+update_linktrails\.py'):
-            site = pywikibot.Site(code, 'wikipedia')
-
-        if isinstance(site, pywikibot.site.RemovedSite):
-            continue
-
-        if site.code != code:
-            pywikibot.output('"{}" is redirected to "{}"; skipping.'
-                             .format(code, site.code))
-            continue
-
-        linktrail = site.siteinfo.get('general', expiry=True)['linktrail']
-        oldtrail = fam.linktrails.get(code)
-        formatter.send((code, oldtrail, linktrail))
-
-
-@coroutine
-def update_line():
-    """Format linktrail for family file."""
-    writer = update_family_file()
-    matcher = update_matched_line(writer)
-    while True:
-        code, old, linktrail = yield
-        line = format_string(code, old) if old else ''
-
-        if not linktrail:
-            writer.send(line)
-            continue
-
-        if linktrail == '/^()(.*)$/sD':  # empty linktrail
-            line = format_string(code, '')
-            writer.send(line)
-            continue
-
-        match = re.search(
-            r'\((?:\:\?|\?\:)?\[(?P<pattern>.+?)\]'
-            r'(?P<letters>(\|.)*)\)?\+\)',
-            linktrail)
-
-        if not match:
-            pywikibot.output('"{}": No pattern found in "{}"'
-                             .format(code, linktrail))
-            writer.send(line)
-            continue
-
-        matcher.send((code, old, match))
-
-
-@coroutine
-def update_matched_line(writer):
-    """Update matched linktrail."""
-    while True:
-        code, old, match = yield
-        pattern = match.group('pattern')
-        letters = match.group('letters')
-        if pattern == 'a-z' and not letters:  # default
-            if old:
-                pywikibot.output('"{}" has default linktrail; '
-                                 'removing {}'.format(code, old))
-            continue
-
-        if r'x{' in pattern:
-            # replace unicode escape string by corresponding char
-            pattern = re.sub(
-                r'\\x\{([A-F0-9]{4})\}',
-                lambda match: chr(int(match.group(1), 16)),
-                pattern)
-
-        if letters:
-            pattern += ''.join(letters.split('|'))
-
-        new = '[{}]*'.format(pattern)
-        line = format_string(code, new)
-        writer.send(line)
-
-
-@coroutine
-def update_family_file():
-    """Collect linktrails and write them to family.py."""
-    text = "    linktrails = {\n        '_default': '[a-z]*',\n"
-    try:
-        while True:
-            text += yield
-    except GeneratorExit:
-        text += '    }'
-        # write linktrails to family file
-        pywikibot.output('Writing family file...')
-        family_file_name = join('pywikibot', 'family.py')
-        with codecs.open(family_file_name, 'r', 'utf8') as family_file:
-            family_text = family_file.read()
-        family_text = re.sub(r'(?ms)^ {4}linktrails.+?\}',
-                             text, family_text, 1)
-        with codecs.open(family_file_name, 'w', 'utf8') as family_file:
-            family_file.write(family_text)
-
-
-def update_linktrails(family):
-    """Update linktrails for given family."""
-    with closing(update_sites(family)) as updater:
-        for code in sorted(family.langs):
-            updater.send(code)
-
-
-if __name__ == '__main__':
-    site = pywikibot.Site('en', 'wikipedia')
-    update_linktrails(site.family)
diff --git a/tests/site_tests.py b/tests/site_tests.py
index 2c0f0e3..4698341 100755
--- a/tests/site_tests.py
+++ b/tests/site_tests.py
@@ -3260,6 +3260,67 @@
         self.assertFalse(site.sametitle('Invalid:Foo', 'Invalid:foo'))


+class TestLinktrails(TestCase):
+
+    """Test linktrail method."""
+
+    family = 'wikipedia'
+    code = 'test'
+
+    def test_has_linktrail(self):
+        """Verify that every code has a linktrail.
+
+        Test all smallest wikis and the others randomly.
+        """
+        size = 20
+        small_wikis = self.site.family.languages_by_size[-size:]
+        great_wikis = self.site.family.languages_by_size[:-size]
+        random.shuffle(great_wikis)
+        great_wikis = great_wikis[:size]
+        for code in sorted(small_wikis + great_wikis):
+            site = pywikibot.Site(code, self.family)
+            with self.subTest(site=site):
+                self.assertIsInstance(site.linktrail(), str)
+
+    def test_linktrails(self):
+        """Test special linktrails.
+
+        This is a subset of the old `family.linktrails` dict.
+        """
+        linktrails = {
+            'ami': '',
+            'bug': '[a-z]*',
+            'ca': "(?:[a-zàèéíòóúç·ïü]|'(?!'))*",
+            'da': '[a-zæøå]*',
+            'ext': '[a-záéíóúñ]*',
+            'fa': '[ابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیآأئؤة‌]*',
+            'gu': '[઀-૿]*',
+            'he': '[a-zא-ת]*',
+            'ii': '',
+            'jv': '[a-z]*',
+            'kaa': "(?:[a-zıʼ’“»]|'(?!'))*",
+            'lez': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӀ]*',
+            'mai': '[a-zऀ-ॣ०-꣠-ꣿ]*',
+            'nds-nl': '[a-zäöüïëéèà]*',
+            'or': '[a-z଀-୿]*',
+            'pt': '[áâãàéêẽçíòóôõq̃úüűũa-z]*',
+            'qu': '[a-záéíóúñ]*',
+            'roa-rup': '[a-zăâîşţșțĂÂÎŞŢȘȚ]*',
+            'sa': '[a-zऀ-ॣ०-꣠-ꣿ]*',
+            'te': '[ఁ-౯]*',
+            'uz': '[a-zʻʼ“»]*',
+            'vec': '[a-zàéèíîìóòúù]*',
+            'wuu': '',
+            'xmf': '[a-zაბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰ“»]*',
+            'yi': '[a-zא-ת]*',
+            'zh-cn': ''
+        }
+        for code, linktrail in linktrails.items():
+            site = pywikibot.Site(code, self.family)
+            with self.subTest(site=site):
+                self.assertEqual(site.linktrail(), linktrail)
+
+
 class TestObsoleteSite(DefaultSiteTestCase):

     """Test 'closed' and obsolete code sites."""
diff --git a/tests/utils.py b/tests/utils.py
index 35f3372..160d703 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -342,6 +342,10 @@
                     author_ns, 'Author', case=self.siteinfo['case'])
         return ns_dict

+    def linktrail(self):
+        """Return default linkrail."""
+        return '[a-z]*'
+
     @property
     def userinfo(self):
         """Return dry data."""

--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/579877
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ie12ddb65f2ed9a9d520b39a4c372d3ee5d9f6309
Gerrit-Change-Number: 579877
Gerrit-PatchSet: 7
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: Matěj Suchánek <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: Meno25 <[email protected]>
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to