jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/817748 )

Change subject: [IMPR] Speed up archivebot.py a bit
......................................................................

[IMPR] Speed up archivebot.py a bit

- early return DiscussionPage.is_full
- reorder conditions to calculate the most used first
- return get_archive_page from archives if present
- don't walk down and up to find a counter if the first was found
- not necessary to delete local values

Change-Id: Idfd271f32020a5f2c56557641f3331d6e3db7233
---
M scripts/archivebot.py
1 file changed, 25 insertions(+), 15 deletions(-)

Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/scripts/archivebot.py b/scripts/archivebot.py
index 6c73eca..8b3100d 100755
--- a/scripts/archivebot.py
+++ b/scripts/archivebot.py
@@ -407,10 +407,14 @@
 
     def is_full(self, max_archive_size: Size) -> bool:
         """Check whether archive size exceeded."""
+        if self.full:
+            return True
+
         size, unit = max_archive_size
-        if (self.size() > self.archiver.maxsize
-            or unit == 'B' and self.size() >= size
-                or unit == 'T' and len(self.threads) >= size):
+        self_size = self.size()
+        if (unit == 'B' and self_size >= size
+            or unit == 'T' and len(self.threads) >= size
+                or self_size > self.archiver.maxsize):
             self.full = True  # xxx: this is one-way flag
         return self.full

@@ -584,21 +588,21 @@
         return None

     def get_archive_page(self, title: str, params=None) -> DiscussionPage:
-        """
-        Return the page for archiving.
+        """Return the page for archiving.

         If it doesn't exist yet, create and cache it.
         Also check for security violations.
         """
-        page_title = self.page.title()
-        archive = pywikibot.Page(self.site, title)
-        if not (self.force or title.startswith(page_title + '/')
-                or self.key_ok()):
-            raise ArchiveSecurityError(
-                'Archive page {} does not start with page title ({})!'
-                .format(archive, page_title))
         if title not in self.archives:
-            self.archives[title] = DiscussionPage(archive, self, params)
+            page_title = self.page.title()
+            archive_link = pywikibot.Link(title, self.site)
+            if not (title.startswith(page_title + '/') or self.force
+                    or self.key_ok()):
+                raise ArchiveSecurityError(
+                    'Archive page {} does not start with page title ({})!'
+                    .format(archive_link, page_title))
+            self.archives[title] = DiscussionPage(archive_link, self, params)
+
         return self.archives[title]

     def get_params(self, timestamp, counter: int) -> dict:
@@ -666,7 +670,6 @@
         params = self.get_params(self.now, counter)
         aux_params = self.get_params(self.now, counter + 1)
         counter_matters = (pattern % params) != (pattern % aux_params)
-        del params, aux_params

         # we need to start with the oldest archive since that is
         # the one the saved counter applies to, so sort the groups
@@ -680,6 +683,7 @@
             # 1. it matters (AND)
             # 2. "era" (year, month, etc.) changes (AND)
             # 3. there is something to put to the new archive.
+            counter_found = False
             for i, thread in group:
                 threads_left = len(self.page.threads) - self.archived_threads
                 if threads_left <= int(self.get_attr('minthreadsleft', 5)):
@@ -694,7 +698,8 @@
                 archive = self.get_archive_page(pattern % params, params)

                 if counter_matters:
-                    while counter > 1 and not archive.exists():
+                    while not counter_found and counter > 1 \
+                            and not archive.exists():
                         # This may happen when either:
                         # 1. a previous version of the bot run and reset
                         #    the counter without archiving anything
@@ -707,6 +712,10 @@
                         params = self.get_params(thread.timestamp, counter)
                         archive = self.get_archive_page(
                             pattern % params, params)
+                    else:
+                        # There are only non existing pages found by count down
+                        counter_found = True
+
                     while archive.is_full(max_arch_size):
                         counter += 1
                         params = self.get_params(thread.timestamp, counter)
@@ -715,6 +724,7 @@

                 archive.feed_thread(thread, max_arch_size)
                 self.archived_threads += 1
+
             if counter_matters:
                 era_change = True


--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/817748
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Idfd271f32020a5f2c56557641f3331d6e3db7233
Gerrit-Change-Number: 817748
Gerrit-PatchSet: 6
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: D3r1ck01 <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: MarcoAurelio <[email protected]>
Gerrit-CC: Matěj Suchánek <[email protected]>
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to