This is an automated email from the ASF dual-hosted git repository.
brondsem pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/allura.git
The following commit(s) were added to refs/heads/master by this push:
new a484f5056 [#8525] urlopen cleanup
a484f5056 is described below
commit a484f5056e02b452eb09255b6d887cfc9715f2fb
Author: Dave Brondsema <[email protected]>
AuthorDate: Fri Nov 3 10:42:49 2023 -0400
[#8525] urlopen cleanup
---
Allura/allura/lib/helpers.py | 44 ++++++++++++++++++------
Allura/allura/tests/test_helpers.py | 22 ++++++------
Allura/development.ini | 3 ++
ForgeImporters/forgeimporters/base.py | 16 ++++-----
ForgeImporters/forgeimporters/tests/test_base.py | 34 ++++++++++++++++--
5 files changed, 86 insertions(+), 33 deletions(-)
diff --git a/Allura/allura/lib/helpers.py b/Allura/allura/lib/helpers.py
index 9e9f8b294..c822d9e00 100644
--- a/Allura/allura/lib/helpers.py
+++ b/Allura/allura/lib/helpers.py
@@ -14,6 +14,8 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+from __future__ import annotations
+
import base64
import sys
import os
@@ -21,9 +23,9 @@ import os.path
import difflib
import jinja2
-import six.moves.urllib.request
-import six.moves.urllib.parse
-import six.moves.urllib.error
+import urllib.request
+import urllib.parse
+import urllib.error
import re
import unicodedata
import json
@@ -65,6 +67,7 @@ from webob.exc import HTTPUnauthorized
from allura.lib import exceptions as exc
from allura.lib import utils
+from allura.lib import validators
import urllib.parse as urlparse
from urllib.parse import urlencode
import math
@@ -201,16 +204,16 @@ def monkeypatch(*objs):
def urlquote(url, safe=b"/"):
try:
- return six.moves.urllib.parse.quote(str(url), safe=safe)
+ return urllib.parse.quote(str(url), safe=safe)
except UnicodeEncodeError:
- return six.moves.urllib.parse.quote(url.encode('utf-8'), safe=safe)
+ return urllib.parse.quote(url.encode('utf-8'), safe=safe)
def urlquoteplus(url, safe=b""):
try:
- return six.moves.urllib.parse.quote_plus(str(url), safe=safe)
+ return urllib.parse.quote_plus(str(url), safe=safe)
except UnicodeEncodeError:
- return six.moves.urllib.parse.quote_plus(url.encode('utf-8'),
safe=safe)
+ return urllib.parse.quote_plus(url.encode('utf-8'), safe=safe)
def urlquote_path_only(url):
@@ -1027,7 +1030,18 @@ class exceptionless:
return inner
-def urlopen(url, retries=3, codes=(408, 500, 502, 503, 504), timeout=None):
+class NoRedirectToInternal(urllib.request.HTTPRedirectHandler):
+ def redirect_request(self, req: urllib.request.Request, fp, code, msg,
headers, newurl: str):
+ if not asbool(tg.config.get('urlopen_allow_internal_hostnames',
'false')):
+ validators.URLIsPrivate().to_python(newurl, None)
+ return super().redirect_request(req, fp, code, msg, headers, newurl)
+
+
+opener = urllib.request.build_opener(NoRedirectToInternal)
+urllib.request.install_opener(opener)
+
+
+def urlopen(url: str | urllib.request.Request, retries=3, codes=(408, 500,
502, 503, 504), timeout=None):
"""Open url, optionally retrying if an error is encountered.
Socket and other IO errors will always be retried if retries > 0.
@@ -1037,12 +1051,22 @@ def urlopen(url, retries=3, codes=(408, 500, 502, 503,
504), timeout=None):
:param codes: HTTP error codes that should be retried.
"""
+ if isinstance(url, urllib.request.Request):
+ url_str = url.full_url
+ else:
+ url_str = url
+ if not url_str.startswith(('http://', 'https://')):
+ raise ValueError(f'URL must be http(s), got {url_str}')
+ if not asbool(tg.config.get('urlopen_allow_internal_hostnames', 'false')):
+ # will raise error if hostname resolves to private address space:
+ validators.URLIsPrivate().to_python(url_str, None)
+
attempts = 0
while True:
try:
- return six.moves.urllib.request.urlopen(url, timeout=timeout)
+ return urllib.request.urlopen(url, timeout=timeout)
except OSError as e:
- no_retry = isinstance(e, six.moves.urllib.error.HTTPError) and
e.code not in codes
+ no_retry = isinstance(e, urllib.error.HTTPError) and e.code not in
codes
if attempts < retries and not no_retry:
attempts += 1
continue
diff --git a/Allura/allura/tests/test_helpers.py
b/Allura/allura/tests/test_helpers.py
index 9db093a81..9a12062bd 100644
--- a/Allura/allura/tests/test_helpers.py
+++ b/Allura/allura/tests/test_helpers.py
@@ -454,13 +454,13 @@ back\\\-slash escaped
class TestUrlOpen(TestCase):
- @patch('six.moves.urllib.request.urlopen')
+ @patch('urllib.request.urlopen')
def test_no_error(self, urlopen):
- r = h.urlopen('myurl')
+ r = h.urlopen('http://example.com')
self.assertEqual(r, urlopen.return_value)
- urlopen.assert_called_once_with('myurl', timeout=None)
+ urlopen.assert_called_once_with('http://example.com', timeout=None)
- @patch('six.moves.urllib.request.urlopen')
+ @patch('urllib.request.urlopen')
def test_socket_timeout(self, urlopen):
import socket
@@ -468,10 +468,10 @@ class TestUrlOpen(TestCase):
raise socket.timeout()
urlopen.side_effect = side_effect
- self.assertRaises(socket.timeout, h.urlopen, 'myurl')
+ self.assertRaises(socket.timeout, h.urlopen, 'http://example.com')
self.assertEqual(urlopen.call_count, 4)
- @patch('six.moves.urllib.request.urlopen')
+ @patch('urllib.request.urlopen')
def test_socket_reset(self, urlopen):
import socket
import errno
@@ -480,10 +480,10 @@ class TestUrlOpen(TestCase):
raise OSError(errno.ECONNRESET, 'Connection reset by peer')
urlopen.side_effect = side_effect
- self.assertRaises(socket.error, h.urlopen, 'myurl')
+ self.assertRaises(socket.error, h.urlopen, 'http://example.com')
self.assertEqual(urlopen.call_count, 4)
- @patch('six.moves.urllib.request.urlopen')
+ @patch('urllib.request.urlopen')
def test_handled_http_error(self, urlopen):
from urllib.error import HTTPError
@@ -491,10 +491,10 @@ class TestUrlOpen(TestCase):
raise HTTPError('url', 408, 'timeout', None, io.BytesIO())
urlopen.side_effect = side_effect
- self.assertRaises(HTTPError, h.urlopen, 'myurl')
+ self.assertRaises(HTTPError, h.urlopen, 'http://example.com')
self.assertEqual(urlopen.call_count, 4)
- @patch('six.moves.urllib.request.urlopen')
+ @patch('urllib.request.urlopen')
def test_unhandled_http_error(self, urlopen):
from urllib.error import HTTPError
@@ -502,7 +502,7 @@ class TestUrlOpen(TestCase):
raise HTTPError('url', 404, 'timeout', None, io.BytesIO())
urlopen.side_effect = side_effect
- self.assertRaises(HTTPError, h.urlopen, 'myurl')
+ self.assertRaises(HTTPError, h.urlopen, 'http://example.com')
self.assertEqual(urlopen.call_count, 1)
diff --git a/Allura/development.ini b/Allura/development.ini
index 7e3850d50..0f93fcc41 100644
--- a/Allura/development.ini
+++ b/Allura/development.ini
@@ -471,6 +471,9 @@ bulk_export_download_instructions = Sample instructions for
{project}
importer_upload_path = /tmp/importer_upload/{nbhd}/{project}
+; If you want to allow importing resources from internally resolving
hostnames, enable this:
+;urlopen_allow_internal_hostnames = false
+
; To disable any plugin, tool, importer, etc from being available, you can use
the disable_entry_points config option.
; Specify the keys and values as they are declared in the tool's "setup.py"
file.
; Examples:
diff --git a/ForgeImporters/forgeimporters/base.py
b/ForgeImporters/forgeimporters/base.py
index f21539c29..bd07200c5 100644
--- a/ForgeImporters/forgeimporters/base.py
+++ b/ForgeImporters/forgeimporters/base.py
@@ -20,15 +20,13 @@ import errno
import logging
from io import BytesIO
-import six.moves.urllib.request
-import six.moves.urllib.parse
-import six.moves.urllib.error
+import urllib.request
+import urllib.parse
from collections import defaultdict
import traceback
from urllib.parse import urlparse
from urllib.parse import unquote
from datetime import datetime
-import six
from bs4 import BeautifulSoup
from tg import expose, validate, flash, redirect, config
@@ -159,17 +157,17 @@ class ProjectExtractor:
PAGE_MAP = {}
- def __init__(self, project_name, page_name=None, **kw):
+ def __init__(self, project_name, page_name_or_url=None, **kw):
self.project_name = project_name
self._page_cache = {}
self.url = None
self.page = None
- if page_name:
- self.get_page(page_name, **kw)
+ if page_name_or_url:
+ self.get_page(page_name_or_url, **kw)
@staticmethod
def urlopen(url, retries=3, codes=(408, 500, 502, 503, 504), timeout=120,
unredirected_hdrs=None, **kw):
- req = six.moves.urllib.request.Request(url, **kw)
+ req = urllib.request.Request(url, **kw)
if unredirected_hdrs:
for key, val in unredirected_hdrs.items():
req.add_unredirected_header(key, val)
@@ -211,7 +209,7 @@ class ProjectExtractor:
"""
return self.PAGE_MAP[page_name].format(
- project_name=six.moves.urllib.parse.quote(self.project_name), **kw)
+ project_name=urllib.parse.quote(self.project_name), **kw)
def parse_page(self, page):
"""Transforms the result of a `urlopen` call before returning it from
diff --git a/ForgeImporters/forgeimporters/tests/test_base.py
b/ForgeImporters/forgeimporters/tests/test_base.py
index fe3d18d36..07672023c 100644
--- a/ForgeImporters/forgeimporters/tests/test_base.py
+++ b/ForgeImporters/forgeimporters/tests/test_base.py
@@ -34,16 +34,44 @@ from forgeimporters import base
class TestProjectExtractor(TestCase):
@mock.patch('forgeimporters.base.h.urlopen')
- @mock.patch('six.moves.urllib.request.Request')
+ @mock.patch('urllib.request.Request')
def test_urlopen(self, Request, urlopen):
- r = base.ProjectExtractor.urlopen('myurl', data='foo')
- Request.assert_called_once_with('myurl', data='foo')
+ r = base.ProjectExtractor.urlopen('https://example.com/asdf',
data='foo')
+ Request.assert_called_once_with('https://example.com/asdf', data='foo')
req = Request.return_value
req.add_header.assert_called_once_with(
'User-Agent', 'Allura Data Importer (https://allura.apache.org/)')
urlopen.assert_called_once_with(req, retries=3, codes=(408, 500, 502,
503, 504), timeout=120)
self.assertEqual(r, urlopen.return_value)
+ def test_urlopen_invalid(self):
+ with pytest.raises(ValueError):
+ base.ProjectExtractor.urlopen('myurl', data='foo')
+ with pytest.raises(ValueError):
+ base.ProjectExtractor.urlopen('file:///etc/passwd', data='foo')
+ with pytest.raises(ValueError):
+ base.ProjectExtractor.urlopen('ftp://something.com', data='foo')
+
+ def test_urlopen_internal_blocked(self):
+ # by default this is invalid
+ with pytest.raises(Invalid):
+ base.ProjectExtractor.urlopen('http://localhost:1234/blah',
data='foo')
+
+ # redirect to external site ok
+
base.ProjectExtractor.urlopen('https://httpbin.org/redirect-to?url=http%3A%2F%2Fexample.com%2F')
+
+ # redirect to internal is blocked
+ with pytest.raises(Invalid):
+
base.ProjectExtractor.urlopen('https://httpbin.org/redirect-to?url=http%3A%2F%2Flocalhost%2F')
+
+ @mock.patch('forgeimporters.base.h.urlopen')
+ @mock.patch('urllib.request.Request')
+ def test_urlopen_internal_ok(self, Request, urlopen):
+ # does NOT raise Invalid
+ with mock.patch.dict(base.config, {'urlopen_allow_internal_hostnames':
'true'}):
+ base.ProjectExtractor.urlopen('http://localhost:1234/blah',
data='foo')
+ Request.assert_called_once_with('http://localhost:1234/blah',
data='foo')
+
@mock.patch.object(base, 'datetime')
@mock.patch.object(base, 'M')