This is an automated email from the ASF dual-hosted git repository.

brondsem pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/allura.git


The following commit(s) were added to refs/heads/master by this push:
     new a484f5056 [#8525] urlopen cleanup
a484f5056 is described below

commit a484f5056e02b452eb09255b6d887cfc9715f2fb
Author: Dave Brondsema <[email protected]>
AuthorDate: Fri Nov 3 10:42:49 2023 -0400

    [#8525] urlopen cleanup
---
 Allura/allura/lib/helpers.py                     | 44 ++++++++++++++++++------
 Allura/allura/tests/test_helpers.py              | 22 ++++++------
 Allura/development.ini                           |  3 ++
 ForgeImporters/forgeimporters/base.py            | 16 ++++-----
 ForgeImporters/forgeimporters/tests/test_base.py | 34 ++++++++++++++++--
 5 files changed, 86 insertions(+), 33 deletions(-)

diff --git a/Allura/allura/lib/helpers.py b/Allura/allura/lib/helpers.py
index 9e9f8b294..c822d9e00 100644
--- a/Allura/allura/lib/helpers.py
+++ b/Allura/allura/lib/helpers.py
@@ -14,6 +14,8 @@
 #       KIND, either express or implied.  See the License for the
 #       specific language governing permissions and limitations
 #       under the License.
+from __future__ import annotations
+
 import base64
 import sys
 import os
@@ -21,9 +23,9 @@ import os.path
 import difflib
 import jinja2
 
-import six.moves.urllib.request
-import six.moves.urllib.parse
-import six.moves.urllib.error
+import urllib.request
+import urllib.parse
+import urllib.error
 import re
 import unicodedata
 import json
@@ -65,6 +67,7 @@ from webob.exc import HTTPUnauthorized
 
 from allura.lib import exceptions as exc
 from allura.lib import utils
+from allura.lib import validators
 import urllib.parse as urlparse
 from urllib.parse import urlencode
 import math
@@ -201,16 +204,16 @@ def monkeypatch(*objs):
 
 def urlquote(url, safe=b"/"):
     try:
-        return six.moves.urllib.parse.quote(str(url), safe=safe)
+        return urllib.parse.quote(str(url), safe=safe)
     except UnicodeEncodeError:
-        return six.moves.urllib.parse.quote(url.encode('utf-8'), safe=safe)
+        return urllib.parse.quote(url.encode('utf-8'), safe=safe)
 
 
 def urlquoteplus(url, safe=b""):
     try:
-        return six.moves.urllib.parse.quote_plus(str(url), safe=safe)
+        return urllib.parse.quote_plus(str(url), safe=safe)
     except UnicodeEncodeError:
-        return six.moves.urllib.parse.quote_plus(url.encode('utf-8'), 
safe=safe)
+        return urllib.parse.quote_plus(url.encode('utf-8'), safe=safe)
 
 
 def urlquote_path_only(url):
@@ -1027,7 +1030,18 @@ class exceptionless:
         return inner
 
 
-def urlopen(url, retries=3, codes=(408, 500, 502, 503, 504), timeout=None):
+class NoRedirectToInternal(urllib.request.HTTPRedirectHandler):
+    def redirect_request(self, req: urllib.request.Request, fp, code, msg, 
headers, newurl: str):
+        if not asbool(tg.config.get('urlopen_allow_internal_hostnames', 
'false')):
+            validators.URLIsPrivate().to_python(newurl, None)
+        return super().redirect_request(req, fp, code, msg, headers, newurl)
+
+
+opener = urllib.request.build_opener(NoRedirectToInternal)
+urllib.request.install_opener(opener)
+
+
+def urlopen(url: str | urllib.request.Request, retries=3, codes=(408, 500, 
502, 503, 504), timeout=None):
     """Open url, optionally retrying if an error is encountered.
 
     Socket and other IO errors will always be retried if retries > 0.
@@ -1037,12 +1051,22 @@ def urlopen(url, retries=3, codes=(408, 500, 502, 503, 
504), timeout=None):
     :param codes: HTTP error codes that should be retried.
 
     """
+    if isinstance(url, urllib.request.Request):
+        url_str =  url.full_url
+    else:
+        url_str = url
+    if not url_str.startswith(('http://', 'https://')):
+        raise ValueError(f'URL must be http(s), got {url_str}')
+    if not asbool(tg.config.get('urlopen_allow_internal_hostnames', 'false')):
+        # will raise error if hostname resolves to private address space:
+        validators.URLIsPrivate().to_python(url_str, None)
+
     attempts = 0
     while True:
         try:
-            return six.moves.urllib.request.urlopen(url, timeout=timeout)
+            return urllib.request.urlopen(url, timeout=timeout)
         except OSError as e:
-            no_retry = isinstance(e, six.moves.urllib.error.HTTPError) and 
e.code not in codes
+            no_retry = isinstance(e, urllib.error.HTTPError) and e.code not in 
codes
             if attempts < retries and not no_retry:
                 attempts += 1
                 continue
diff --git a/Allura/allura/tests/test_helpers.py 
b/Allura/allura/tests/test_helpers.py
index 9db093a81..9a12062bd 100644
--- a/Allura/allura/tests/test_helpers.py
+++ b/Allura/allura/tests/test_helpers.py
@@ -454,13 +454,13 @@ back\\\-slash escaped
 
 class TestUrlOpen(TestCase):
 
-    @patch('six.moves.urllib.request.urlopen')
+    @patch('urllib.request.urlopen')
     def test_no_error(self, urlopen):
-        r = h.urlopen('myurl')
+        r = h.urlopen('http://example.com')
         self.assertEqual(r, urlopen.return_value)
-        urlopen.assert_called_once_with('myurl', timeout=None)
+        urlopen.assert_called_once_with('http://example.com', timeout=None)
 
-    @patch('six.moves.urllib.request.urlopen')
+    @patch('urllib.request.urlopen')
     def test_socket_timeout(self, urlopen):
         import socket
 
@@ -468,10 +468,10 @@ class TestUrlOpen(TestCase):
             raise socket.timeout()
 
         urlopen.side_effect = side_effect
-        self.assertRaises(socket.timeout, h.urlopen, 'myurl')
+        self.assertRaises(socket.timeout, h.urlopen, 'http://example.com')
         self.assertEqual(urlopen.call_count, 4)
 
-    @patch('six.moves.urllib.request.urlopen')
+    @patch('urllib.request.urlopen')
     def test_socket_reset(self, urlopen):
         import socket
         import errno
@@ -480,10 +480,10 @@ class TestUrlOpen(TestCase):
             raise OSError(errno.ECONNRESET, 'Connection reset by peer')
 
         urlopen.side_effect = side_effect
-        self.assertRaises(socket.error, h.urlopen, 'myurl')
+        self.assertRaises(socket.error, h.urlopen, 'http://example.com')
         self.assertEqual(urlopen.call_count, 4)
 
-    @patch('six.moves.urllib.request.urlopen')
+    @patch('urllib.request.urlopen')
     def test_handled_http_error(self, urlopen):
         from urllib.error import HTTPError
 
@@ -491,10 +491,10 @@ class TestUrlOpen(TestCase):
             raise HTTPError('url', 408, 'timeout', None, io.BytesIO())
 
         urlopen.side_effect = side_effect
-        self.assertRaises(HTTPError, h.urlopen, 'myurl')
+        self.assertRaises(HTTPError, h.urlopen, 'http://example.com')
         self.assertEqual(urlopen.call_count, 4)
 
-    @patch('six.moves.urllib.request.urlopen')
+    @patch('urllib.request.urlopen')
     def test_unhandled_http_error(self, urlopen):
         from urllib.error import HTTPError
 
@@ -502,7 +502,7 @@ class TestUrlOpen(TestCase):
             raise HTTPError('url', 404, 'timeout', None, io.BytesIO())
 
         urlopen.side_effect = side_effect
-        self.assertRaises(HTTPError, h.urlopen, 'myurl')
+        self.assertRaises(HTTPError, h.urlopen, 'http://example.com')
         self.assertEqual(urlopen.call_count, 1)
 
 
diff --git a/Allura/development.ini b/Allura/development.ini
index 7e3850d50..0f93fcc41 100644
--- a/Allura/development.ini
+++ b/Allura/development.ini
@@ -471,6 +471,9 @@ bulk_export_download_instructions = Sample instructions for 
{project}
 
 importer_upload_path = /tmp/importer_upload/{nbhd}/{project}
 
+; If you want to allow importing resources from internally resolving 
hostnames, enable this:
+;urlopen_allow_internal_hostnames = false
+
 ; To disable any plugin, tool, importer, etc from being available, you can use 
the disable_entry_points config option.
 ; Specify the keys and values as they are declared in the tool's "setup.py" 
file.
 ; Examples:
diff --git a/ForgeImporters/forgeimporters/base.py 
b/ForgeImporters/forgeimporters/base.py
index f21539c29..bd07200c5 100644
--- a/ForgeImporters/forgeimporters/base.py
+++ b/ForgeImporters/forgeimporters/base.py
@@ -20,15 +20,13 @@ import errno
 import logging
 from io import BytesIO
 
-import six.moves.urllib.request
-import six.moves.urllib.parse
-import six.moves.urllib.error
+import urllib.request
+import urllib.parse
 from collections import defaultdict
 import traceback
 from urllib.parse import urlparse
 from urllib.parse import unquote
 from datetime import datetime
-import six
 
 from bs4 import BeautifulSoup
 from tg import expose, validate, flash, redirect, config
@@ -159,17 +157,17 @@ class ProjectExtractor:
 
     PAGE_MAP = {}
 
-    def __init__(self, project_name, page_name=None, **kw):
+    def __init__(self, project_name, page_name_or_url=None, **kw):
         self.project_name = project_name
         self._page_cache = {}
         self.url = None
         self.page = None
-        if page_name:
-            self.get_page(page_name, **kw)
+        if page_name_or_url:
+            self.get_page(page_name_or_url, **kw)
 
     @staticmethod
     def urlopen(url, retries=3, codes=(408, 500, 502, 503, 504), timeout=120, 
unredirected_hdrs=None, **kw):
-        req = six.moves.urllib.request.Request(url, **kw)
+        req = urllib.request.Request(url, **kw)
         if unredirected_hdrs:
             for key, val in unredirected_hdrs.items():
                 req.add_unredirected_header(key, val)
@@ -211,7 +209,7 @@ class ProjectExtractor:
 
         """
         return self.PAGE_MAP[page_name].format(
-            project_name=six.moves.urllib.parse.quote(self.project_name), **kw)
+            project_name=urllib.parse.quote(self.project_name), **kw)
 
     def parse_page(self, page):
         """Transforms the result of a `urlopen` call before returning it from
diff --git a/ForgeImporters/forgeimporters/tests/test_base.py 
b/ForgeImporters/forgeimporters/tests/test_base.py
index fe3d18d36..07672023c 100644
--- a/ForgeImporters/forgeimporters/tests/test_base.py
+++ b/ForgeImporters/forgeimporters/tests/test_base.py
@@ -34,16 +34,44 @@ from forgeimporters import base
 class TestProjectExtractor(TestCase):
 
     @mock.patch('forgeimporters.base.h.urlopen')
-    @mock.patch('six.moves.urllib.request.Request')
+    @mock.patch('urllib.request.Request')
     def test_urlopen(self, Request, urlopen):
-        r = base.ProjectExtractor.urlopen('myurl', data='foo')
-        Request.assert_called_once_with('myurl', data='foo')
+        r = base.ProjectExtractor.urlopen('https://example.com/asdf', 
data='foo')
+        Request.assert_called_once_with('https://example.com/asdf', data='foo')
         req = Request.return_value
         req.add_header.assert_called_once_with(
             'User-Agent', 'Allura Data Importer (https://allura.apache.org/)')
         urlopen.assert_called_once_with(req, retries=3, codes=(408, 500, 502, 
503, 504), timeout=120)
         self.assertEqual(r, urlopen.return_value)
 
+    def test_urlopen_invalid(self):
+        with pytest.raises(ValueError):
+            base.ProjectExtractor.urlopen('myurl', data='foo')
+        with pytest.raises(ValueError):
+            base.ProjectExtractor.urlopen('file:///etc/passwd', data='foo')
+        with pytest.raises(ValueError):
+            base.ProjectExtractor.urlopen('ftp://something.com', data='foo')
+
+    def test_urlopen_internal_blocked(self):
+        # by default this is invalid
+        with pytest.raises(Invalid):
+            base.ProjectExtractor.urlopen('http://localhost:1234/blah', 
data='foo')
+
+        # redirect to external site ok
+        
base.ProjectExtractor.urlopen('https://httpbin.org/redirect-to?url=http%3A%2F%2Fexample.com%2F')
+
+        # redirect to internal is blocked
+        with pytest.raises(Invalid):
+            
base.ProjectExtractor.urlopen('https://httpbin.org/redirect-to?url=http%3A%2F%2Flocalhost%2F')
+
+    @mock.patch('forgeimporters.base.h.urlopen')
+    @mock.patch('urllib.request.Request')
+    def test_urlopen_internal_ok(self, Request, urlopen):
+        # does NOT raise Invalid
+        with mock.patch.dict(base.config, {'urlopen_allow_internal_hostnames': 
'true'}):
+            base.ProjectExtractor.urlopen('http://localhost:1234/blah', 
data='foo')
+        Request.assert_called_once_with('http://localhost:1234/blah', 
data='foo')
+
 
 @mock.patch.object(base, 'datetime')
 @mock.patch.object(base, 'M')

Reply via email to