This is an automated email from the ASF dual-hosted git repository. brondsem pushed a commit to branch db/8523 in repository https://gitbox.apache.org/repos/asf/allura.git
commit 52acb883ea96fa351a66cce584115ac36bd67d79 Author: Dave Brondsema <[email protected]> AuthorDate: Wed Oct 18 18:24:34 2023 -0400 [#8523] never proactively sleep after a request (and then request again); only sleep if this request got limited --- ForgeImporters/forgeimporters/github/__init__.py | 12 ++------ .../forgeimporters/tests/github/test_extractor.py | 35 +--------------------- 2 files changed, 4 insertions(+), 43 deletions(-) diff --git a/ForgeImporters/forgeimporters/github/__init__.py b/ForgeImporters/forgeimporters/github/__init__.py index b7d08b0d8..f14d215f7 100644 --- a/ForgeImporters/forgeimporters/github/__init__.py +++ b/ForgeImporters/forgeimporters/github/__init__.py @@ -97,6 +97,7 @@ class GitHubProjectExtractor(base.ProjectExtractor): limit = headers.get('X-RateLimit-Limit') reset = datetime.utcfromtimestamp(int(reset)) now = datetime.utcnow() + # 60/hour is for GitHub unauthenticated users. If you get that, check your auth tokens log.warning('Rate limit exceeded (%s requests/hour). ' 'Sleeping until %s UTC' % (limit, reset)) time.sleep((reset - now).total_seconds() + 2) @@ -117,23 +118,16 @@ class GitHubProjectExtractor(base.ProjectExtractor): else: unredirected_hdrs = auth_headers try: - resp = super().urlopen(url, headers=headers, unredirected_hdrs=unredirected_hdrs, **kw) + return super().urlopen(url, headers=headers, unredirected_hdrs=unredirected_hdrs, **kw) except six.moves.urllib.error.HTTPError as e: # GitHub will return 403 if rate limit exceeded. - # We're checking for limit on every request below, but we still - # can get 403 if other import task exceeds the limit before. if e.code == 403 and e.info().get('X-RateLimit-Remaining') == '0': self.wait_for_limit_reset(e.info()) # call ourselves to try again: return self.urlopen(url, headers=headers, use_auth_headers_on_redirects=use_auth_headers_on_redirects, **kw) else: - raise e - remain = resp.info().get('X-RateLimit-Remaining') - if remain and int(remain) == 0: - self.wait_for_limit_reset(resp.info()) - return self.urlopen(url, **kw) - return resp + raise def check_readable(self): url, headers = self.add_token(self.get_page_url('project_info')) diff --git a/ForgeImporters/forgeimporters/tests/github/test_extractor.py b/ForgeImporters/forgeimporters/tests/github/test_extractor.py index 3fd6b2f03..e33faf357 100644 --- a/ForgeImporters/forgeimporters/tests/github/test_extractor.py +++ b/ForgeImporters/forgeimporters/tests/github/test_extractor.py @@ -151,44 +151,11 @@ class TestGitHubProjectExtractor(TestCase): assert request.headers['User-agent'] self.assertEqual(request.headers['Authorization'], 'Bearer abc') - @patch('forgeimporters.base.h.urlopen') - @patch('forgeimporters.github.time.sleep') - @patch('forgeimporters.github.log') - def test_urlopen_rate_limit(self, log, sleep, urlopen): - limit_exceeded_headers = { - 'X-RateLimit-Limit': '10', - 'X-RateLimit-Remaining': '0', - 'X-RateLimit-Reset': '1382693522', - } - response_limit_exceeded = BytesIO(b'{}') - response_limit_exceeded.info = lambda: limit_exceeded_headers - response_ok = BytesIO(b'{}') - response_ok.info = lambda: {} - urlopen.side_effect = [response_limit_exceeded, response_ok] - e = github.GitHubProjectExtractor('test_project') - e.get_page('http://example.com/') - self.assertEqual(sleep.call_count, 1) - self.assertEqual(urlopen.call_count, 2) - log.warning.assert_called_once_with( - 'Rate limit exceeded (10 requests/hour). ' - 'Sleeping until 2013-10-25 09:32:02 UTC' - ) - sleep.reset_mock() - urlopen.reset_mock() - log.warning.reset_mock() - response_ok = BytesIO(b'{}') - response_ok.info = lambda: {} - urlopen.side_effect = [response_ok] - e.get_page('http://example.com/2') - self.assertEqual(sleep.call_count, 0) - self.assertEqual(urlopen.call_count, 1) - self.assertEqual(log.warning.call_count, 0) - @patch('forgeimporters.base.h.urlopen') @patch('forgeimporters.github.time.sleep') @patch('forgeimporters.github.log') def test_urlopen_rate_limit_403(self, log, sleep, urlopen): - '''Test that urlopen catches 403 which may happen if limit exceeded by another task''' + '''Test that urlopen catches 403 which may happen if limit exceeded by previous fetches''' limit_exceeded_headers = { 'X-RateLimit-Limit': '10', 'X-RateLimit-Remaining': '0',
