jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1239517?usp=email )

Change subject: Fix: use http.request for parse section instead of section.post
......................................................................

Fix: use http.request for parse section instead of section.post

section.post() does not set User-agent automatically and requests
maybe declined.

Bug: T417495
Change-Id: I8368be804e828c0b075656b1d660345cc64a2f85
---
M scripts/maintenance/unidata.py
1 file changed, 25 insertions(+), 22 deletions(-)

Approvals:
  Meno25: Looks good to me, but someone else must approve
  jenkins-bot: Verified
  Xqt: Looks good to me, approved




diff --git a/scripts/maintenance/unidata.py b/scripts/maintenance/unidata.py
index 67dbd46..de58b53 100755
--- a/scripts/maintenance/unidata.py
+++ b/scripts/maintenance/unidata.py
@@ -23,8 +23,8 @@
 from threading import Thread

 from pywikibot import Site
-from pywikibot.comms.http import session
 from pywikibot.family import Family
+from pywikibot.tools._unidata import _first_upper_exception_dict


 NUMBER_OF_THREADS = 26
@@ -45,38 +45,39 @@

 def chars_uppers_wikilinks():
     """Retrieve upper chars from MediaWiki using page titles."""
-    n = 0
     chars = []
     uppers = []
-    wikilinks = ''
-    for i in range(maxunicode + 1):
+    wikilinks = []
+    for i in range(32, maxunicode + 1):
         c = chr(i)
         uc = c.upper()
-        if uc != c:
-            n += 1
+        if uc != c or c in _first_upper_exception_dict:
             chars.append(c)
             uppers.append(uc)
             # MediaWiki is first-letter case
-            wikilinks += '[[MediaWiki:' + c + ']]\n'
-    return chars, uppers, wikilinks
+            wikilinks.append(f'[[MediaWiki:{c}]]')
+    return chars, uppers, '\n'.join(wikilinks)


 def process_site(fam_name, site_code):
     """Process title for a single site."""
-    j = session.post(
-        f'https://{site_code}.{fam_name}.org/w/api.php?'
-        f'action=parse&contentmodel=wikitext&prop=text'
-        f'&format=json&utf8',
-        data={'text': wikilinks},
-        timeout=10,
-    ).json()
-    parsed_text = j['parse']['text']['*']
+    site = Site(site_code, fam_name)
+    result = site.simple_request(
+        action='parse',
+        text=wikilinks,
+        contentmodel='wikitext',
+        prop='text'
+    ).submit()
+
+    parsed_text = result['parse']['text']['*']
     titles = findall(r'title="[^:]*:(.)', parsed_text)
-    site_excepts = {}
-    for i, original_char in enumerate(chars):
-        title_char = titles[i]
-        if uppers[i] != title_char:
-            site_excepts[original_char] = title_char
+
+    site_excepts = {
+        orig: title
+        for orig, upper, title in zip(chars, uppers, titles)
+        if upper != title
+    }
+
     return site_excepts


@@ -162,4 +163,6 @@
     # families_excepts = load_json(FILEPATH)
     # main()
     # save_json(families_excepts, FILEPATH)
-    print(process_site('wiktionary', 'fr'))  # noqa: T201
+    mapping = process_site('wiktionary', 'fr')
+    print(len(mapping), 'entries found')  # noqa: T201
+    print(mapping)  # noqa: T201

--
To view, visit 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1239517?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings?usp=email

Gerrit-MessageType: merged
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I8368be804e828c0b075656b1d660345cc64a2f85
Gerrit-Change-Number: 1239517
Gerrit-PatchSet: 4
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: Meno25 <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to