zhuyifei1999 added a comment.

In my attempt to isolate the cause of the memory leak, I applied this onto the above patch:

diff --git a/pywikibot/comms/http.py b/pywikibot/comms/http.py
index 76878ae..168b4a3 100644
--- a/pywikibot/comms/http.py
+++ b/pywikibot/comms/http.py
@@ -381,7 +381,7 @@ def _http_process(session, http_request):
         # Note that the connections are pooled which mean that a future
         # HTTPS request can succeed even if the certificate is invalid and
         # verify=True, when a request with verify=False happened before
-        response = session.request(method, uri, params=params, data="">
+        response = requests.request(method, uri, params=params, data="">
                                    headers=headers, auth=auth, timeout=timeout,
                                    verify=not ignore_validation,
                                    **http_request.kwargs)
diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py
index 3c96270..96d9897 100755
--- a/scripts/weblinkchecker.py
+++ b/scripts/weblinkchecker.py
@@ -604,6 +604,7 @@ class LinkCheckThread(threading.Thread):
             header = self.header
             r = comms.http.fetch(
                 self.url, headers=header,
+                method="HEAD",
                 use_fake_user_agent=self._use_fake_user_agent)
         except requests.exceptions.InvalidURL:
             message = i18n.twtranslate(self.page.site,
@@ -728,6 +729,7 @@ class History(object):
 
     def setLinkDead(self, url, error, page, weblink_dead_days):
         """Add the fact that the link was found dead to the .dat file."""
+        return self.save()
         with self.semaphore:
             now = time.time()
             if url in self.historyDict:
@@ -1079,6 +1081,18 @@ def main(*args):
         gen = pagegenerators.RedirectFilterPageGenerator(gen)
         bot = WeblinkCheckerRobot(gen, HTTPignore, config.weblink_dead_days)
         try:
+            import signal
+
+            def on_interactreq(signum, frame):
+                with bot.history.semaphore:
+                    import code
+                    hpy = None
+                    # from guppy import hpy
+                    l = {'bot': bot, 'hpy': hpy}
+                    l.update(globals())
+                    l.update(locals())
+                    code.interact(local=l)
+            signal.signal(signal.SIGUSR1, on_interactreq)
             bot.run()
         finally:
             waitTime = 0

The increasing memory usage is nearly halted near 220+M. Will check the contribution of each.


TASK DETAIL
https://phabricator.wikimedia.org/T185561

EMAIL PREFERENCES
https://phabricator.wikimedia.org/settings/panel/emailpreferences/

To: zhuyifei1999
Cc: gerritbot, Dalba, Xqt, Zoranzoki21, zhuyifei1999, Aklapper, pywikibot-bugs-list, Dvorapa, Giuliamocci, Adrian1985, Cpaulf30, Baloch007, Darkminds3113, Lordiis, Adik2382, Th3d3v1ls, Ramalepe, Liugev6, Magul, Tbscho, rafidaslam, MayS, Lewizho99, Mdupont, JJMC89, Maathavan, Avicennasis, jayvdb, Masti, Alchimista, Rxy
_______________________________________________
pywikibot-bugs mailing list
pywikibot-bugs@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/pywikibot-bugs

Reply via email to