This is an automated email from the ASF dual-hosted git repository.

eladkal pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 062fb3a438 Fix treatment of "#" in S3Hook.parse_s3_url() (#41796)
062fb3a438 is described below

commit 062fb3a4380d3e849f5eec8b9b21eb7065c597ce
Author: GlenboLake <[email protected]>
AuthorDate: Thu Aug 29 17:31:43 2024 -0400

    Fix treatment of "#" in S3Hook.parse_s3_url() (#41796)
    
    The current implementation of parse_s3_url will truncate a key if it 
contains
    an octothorpe character. By passing the allow_fragments=False argument to
    urlsplit, keys will be correctly parsed.
---
 airflow/providers/amazon/aws/hooks/s3.py    |  2 +-
 tests/providers/amazon/aws/hooks/test_s3.py | 74 +++++++++++++++++++++--------
 2 files changed, 56 insertions(+), 20 deletions(-)

diff --git a/airflow/providers/amazon/aws/hooks/s3.py 
b/airflow/providers/amazon/aws/hooks/s3.py
index 5f2c136640..76aed19782 100644
--- a/airflow/providers/amazon/aws/hooks/s3.py
+++ b/airflow/providers/amazon/aws/hooks/s3.py
@@ -238,7 +238,7 @@ class S3Hook(AwsBaseHook):
         valid_s3_virtual_hosted_format = 
"https://bucket-name.s3.region-code.amazonaws.com/key-name";
         format = s3url.split("//")
         if re.match(r"s3[na]?:", format[0], re.IGNORECASE):
-            parsed_url = urlsplit(s3url)
+            parsed_url = urlsplit(s3url, allow_fragments=False)
             if not parsed_url.netloc:
                 raise S3HookUriParseFailure(
                     "Please provide a bucket name using a valid format of the 
form: "
diff --git a/tests/providers/amazon/aws/hooks/test_s3.py 
b/tests/providers/amazon/aws/hooks/test_s3.py
index 9dade82004..97696c64b6 100644
--- a/tests/providers/amazon/aws/hooks/test_s3.py
+++ b/tests/providers/amazon/aws/hooks/test_s3.py
@@ -81,25 +81,61 @@ class TestAwsS3Hook:
         with pytest.raises(TypeError, match="transfer_config_args expected 
dict, got .*"):
             S3Hook(transfer_config_args=transfer_config_args)
 
-    def test_parse_s3_url(self):
-        parsed = S3Hook.parse_s3_url("s3://test/this/is/not/a-real-key.txt")
-        assert parsed == ("test", "this/is/not/a-real-key.txt"), "Incorrect 
parsing of the s3 url"
-
-    def test_parse_s3_url_s3a_style(self):
-        parsed = S3Hook.parse_s3_url("s3a://test/this/is/not/a-real-key.txt")
-        assert parsed == ("test", "this/is/not/a-real-key.txt"), "Incorrect 
parsing of the s3 url"
-
-    def test_parse_s3_url_s3n_style(self):
-        parsed = S3Hook.parse_s3_url("s3n://test/this/is/not/a-real-key.txt")
-        assert parsed == ("test", "this/is/not/a-real-key.txt"), "Incorrect 
parsing of the s3 url"
-
-    def test_parse_s3_url_path_style(self):
-        parsed = 
S3Hook.parse_s3_url("https://s3.us-west-2.amazonaws.com/DOC-EXAMPLE-BUCKET1/test.jpg";)
-        assert parsed == ("DOC-EXAMPLE-BUCKET1", "test.jpg"), "Incorrect 
parsing of the s3 url"
-
-    def test_parse_s3_url_virtual_hosted_style(self):
-        parsed = 
S3Hook.parse_s3_url("https://DOC-EXAMPLE-BUCKET1.s3.us-west-2.amazonaws.com/test.png";)
-        assert parsed == ("DOC-EXAMPLE-BUCKET1", "test.png"), "Incorrect 
parsing of the s3 url"
+    @pytest.mark.parametrize(
+        "url, expected",
+        [
+            pytest.param(
+                "s3://test/this/is/not/a-real-key.txt", ("test", 
"this/is/not/a-real-key.txt"), id="s3 style"
+            ),
+            pytest.param(
+                "s3a://test/this/is/not/a-real-key.txt",
+                ("test", "this/is/not/a-real-key.txt"),
+                id="s3a style",
+            ),
+            pytest.param(
+                "s3n://test/this/is/not/a-real-key.txt",
+                ("test", "this/is/not/a-real-key.txt"),
+                id="s3n style",
+            ),
+            pytest.param(
+                
"https://s3.us-west-2.amazonaws.com/DOC-EXAMPLE-BUCKET1/test.jpg";,
+                ("DOC-EXAMPLE-BUCKET1", "test.jpg"),
+                id="path style",
+            ),
+            pytest.param(
+                
"https://DOC-EXAMPLE-BUCKET1.s3.us-west-2.amazonaws.com/test.png";,
+                ("DOC-EXAMPLE-BUCKET1", "test.png"),
+                id="virtual hosted style",
+            ),
+            pytest.param(
+                "s3://test/this/is/not/a-real-key #2.txt",
+                ("test", "this/is/not/a-real-key #2.txt"),
+                id="s3 style with #",
+            ),
+            pytest.param(
+                "s3a://test/this/is/not/a-real-key #2.txt",
+                ("test", "this/is/not/a-real-key #2.txt"),
+                id="s3a style with #",
+            ),
+            pytest.param(
+                "s3n://test/this/is/not/a-real-key #2.txt",
+                ("test", "this/is/not/a-real-key #2.txt"),
+                id="s3n style with #",
+            ),
+            pytest.param(
+                "https://s3.us-west-2.amazonaws.com/DOC-EXAMPLE-BUCKET1/test 
#2.jpg",
+                ("DOC-EXAMPLE-BUCKET1", "test #2.jpg"),
+                id="path style with #",
+            ),
+            pytest.param(
+                "https://DOC-EXAMPLE-BUCKET1.s3.us-west-2.amazonaws.com/test 
#2.png",
+                ("DOC-EXAMPLE-BUCKET1", "test #2.png"),
+                id="virtual hosted style with #",
+            ),
+        ],
+    )
+    def test_parse_s3_url(self, url: str, expected: tuple[str, str]):
+        assert S3Hook.parse_s3_url(url) == expected, "Incorrect parsing of the 
s3 url"
 
     def test_parse_invalid_s3_url_virtual_hosted_style(self):
         with pytest.raises(

Reply via email to