jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/817190 )

Change subject: [IMPR] process pages in parallel tasks with -async option
......................................................................

[IMPR] process pages in parallel tasks with -async option

This implementation is experimental yet. Processing time is about
7 times faster depending on number of CPU kernels.

Note: The script cannot be interrupted after it is started with this option

Change-Id: I530640292c38890595197441f480f17b82fa4254
---
M scripts/archivebot.py
1 file changed, 18 insertions(+), 5 deletions(-)

Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/scripts/archivebot.py b/scripts/archivebot.py
index c0e2f37..13f250e 100755
--- a/scripts/archivebot.py
+++ b/scripts/archivebot.py
@@ -105,13 +105,15 @@
   -keep           Preserve thread order in archive even if threads are
                   archived later
   -sort           Sort archive by timestamp; should not be used with -keep
+  -async          Run the bot in parallel tasks. This is experimental
+                  and the bot cannot be stopped with KeyboardInterrupt

 .. versionchanged:: 7.6
    Localized variables for "archive" template parameter are supported.
    `User:MiszaBot/config` is the default template. `-keep` option was
    added.
 .. versionchanged:: 7.7
-   `-sort` option was added.
+   `-sort` and `-async` options were added.
 """
 #
 # (C) Pywikibot team, 2006-2022
@@ -124,6 +126,7 @@
 import re
 import time
 from collections import OrderedDict, defaultdict
+from concurrent.futures import ThreadPoolExecutor
 from hashlib import md5
 from math import ceil
 from textwrap import fill
@@ -132,7 +135,7 @@

 import pywikibot
 from pywikibot import i18n
-from pywikibot.backports import List, Set, Tuple, pairwise
+from pywikibot.backports import List, Set, Tuple, nullcontext, pairwise
 from pywikibot.exceptions import Error, NoPageError
 from pywikibot.textlib import (
     TimeStripper,
@@ -850,6 +853,7 @@
     calc = None
     keep = False
     sort = False
+    asyncronous = False
     templates = []

     local_args = pywikibot.handle_args(args)
@@ -883,6 +887,8 @@
             keep = True
         elif option == 'sort':
             sort = True
+        elif option == 'async':
+            asyncronous = True

     site = pywikibot.Site()

@@ -913,15 +919,22 @@
         elif pagename:
             gen = [pywikibot.Page(site, pagename, ns=3)]
         else:
+
             ns = [str(namespace)] if namespace is not None else []
             pywikibot.output('Fetching template transclusions...')
             gen = tmpl.getReferences(only_template_inclusion=True,
                                      follow_redirects=False,
                                      namespaces=ns,
                                      content=True)
-        for pg in gen:
-            if not process_page(pg, tmpl, salt, force, keep, sort):
-                return
+
+        botargs = tmpl, salt, force, keep, sort
+        context = ThreadPoolExecutor if asyncronous else nullcontext
+        with context() as executor:
+            for pg in gen:
+                if asyncronous:
+                    executor.submit(process_page, pg, *botargs)
+                elif not process_page(pg, *botargs):
+                    return


 if __name__ == '__main__':

--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/817190
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I530640292c38890595197441f480f17b82fa4254
Gerrit-Change-Number: 817190
Gerrit-PatchSet: 11
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: D3r1ck01 <[email protected]>
Gerrit-Reviewer: PotsdamLamb
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to