jenkins-bot has submitted this change. (
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1239517?usp=email )
Change subject: Fix: use http.request for parse section instead of section.post
......................................................................
Fix: use http.request for parse section instead of section.post
section.post() does not set User-agent automatically and requests
maybe declined.
Bug: T417495
Change-Id: I8368be804e828c0b075656b1d660345cc64a2f85
---
M scripts/maintenance/unidata.py
1 file changed, 25 insertions(+), 22 deletions(-)
Approvals:
Meno25: Looks good to me, but someone else must approve
jenkins-bot: Verified
Xqt: Looks good to me, approved
diff --git a/scripts/maintenance/unidata.py b/scripts/maintenance/unidata.py
index 67dbd46..de58b53 100755
--- a/scripts/maintenance/unidata.py
+++ b/scripts/maintenance/unidata.py
@@ -23,8 +23,8 @@
from threading import Thread
from pywikibot import Site
-from pywikibot.comms.http import session
from pywikibot.family import Family
+from pywikibot.tools._unidata import _first_upper_exception_dict
NUMBER_OF_THREADS = 26
@@ -45,38 +45,39 @@
def chars_uppers_wikilinks():
"""Retrieve upper chars from MediaWiki using page titles."""
- n = 0
chars = []
uppers = []
- wikilinks = ''
- for i in range(maxunicode + 1):
+ wikilinks = []
+ for i in range(32, maxunicode + 1):
c = chr(i)
uc = c.upper()
- if uc != c:
- n += 1
+ if uc != c or c in _first_upper_exception_dict:
chars.append(c)
uppers.append(uc)
# MediaWiki is first-letter case
- wikilinks += '[[MediaWiki:' + c + ']]\n'
- return chars, uppers, wikilinks
+ wikilinks.append(f'[[MediaWiki:{c}]]')
+ return chars, uppers, '\n'.join(wikilinks)
def process_site(fam_name, site_code):
"""Process title for a single site."""
- j = session.post(
- f'https://{site_code}.{fam_name}.org/w/api.php?'
- f'action=parse&contentmodel=wikitext&prop=text'
- f'&format=json&utf8',
- data={'text': wikilinks},
- timeout=10,
- ).json()
- parsed_text = j['parse']['text']['*']
+ site = Site(site_code, fam_name)
+ result = site.simple_request(
+ action='parse',
+ text=wikilinks,
+ contentmodel='wikitext',
+ prop='text'
+ ).submit()
+
+ parsed_text = result['parse']['text']['*']
titles = findall(r'title="[^:]*:(.)', parsed_text)
- site_excepts = {}
- for i, original_char in enumerate(chars):
- title_char = titles[i]
- if uppers[i] != title_char:
- site_excepts[original_char] = title_char
+
+ site_excepts = {
+ orig: title
+ for orig, upper, title in zip(chars, uppers, titles)
+ if upper != title
+ }
+
return site_excepts
@@ -162,4 +163,6 @@
# families_excepts = load_json(FILEPATH)
# main()
# save_json(families_excepts, FILEPATH)
- print(process_site('wiktionary', 'fr')) # noqa: T201
+ mapping = process_site('wiktionary', 'fr')
+ print(len(mapping), 'entries found') # noqa: T201
+ print(mapping) # noqa: T201
--
To view, visit
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1239517?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gerrit.wikimedia.org/r/settings?usp=email
Gerrit-MessageType: merged
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I8368be804e828c0b075656b1d660345cc64a2f85
Gerrit-Change-Number: 1239517
Gerrit-PatchSet: 4
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: Meno25 <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]