Copilot commented on code in PR #4702:
URL: https://github.com/apache/texera/pull/4702#discussion_r3177357171


##########
amber/src/main/python/pytexera/storage/test_dataset_file_document.py:
##########
@@ -0,0 +1,184 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import io
+
+import pytest
+from unittest.mock import patch, MagicMock
+
+from pytexera.storage.dataset_file_document import DatasetFileDocument
+
+
+DEFAULT_ENDPOINT = "http://localhost:9092/api/dataset/presign-download";
+CUSTOM_ENDPOINT = "https://example.test/api/presign";
+
+
[email protected]
+def auth_env(monkeypatch):
+    """Provide a JWT and pinned presign endpoint for the duration of one 
test."""
+    monkeypatch.setenv("USER_JWT_TOKEN", "test-jwt-token")
+    monkeypatch.setenv("FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT", 
CUSTOM_ENDPOINT)
+
+
+def make_response(status_code: int, body=None, content: bytes = b""):
+    response = MagicMock()
+    response.status_code = status_code
+    response.json.return_value = body or {}
+    response.text = "" if body is None else str(body)
+    response.content = content
+    return response
+
+
+class TestDatasetFileDocumentInit:
+    def test_parses_minimal_four_part_path(self, auth_env):
+        doc = DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+        assert doc.owner_email == "[email protected]"
+        assert doc.dataset_name == "ds"
+        assert doc.version_name == "v1"
+        assert doc.file_relative_path == "file.csv"
+
+    def test_joins_nested_relative_path_back_with_slashes(self, auth_env):
+        doc = DatasetFileDocument("/[email protected]/ds/v1/a/b/c/file.csv")
+        assert doc.file_relative_path == "a/b/c/file.csv"
+
+    def test_strips_leading_and_trailing_slashes_before_parsing(self, 
auth_env):
+        doc = DatasetFileDocument("///[email protected]/ds/v1/file.csv///")
+        assert doc.owner_email == "[email protected]"
+        assert doc.file_relative_path == "file.csv"
+
+    def test_rejects_path_with_fewer_than_four_segments(self, auth_env):
+        with pytest.raises(ValueError, match="Invalid file path format"):
+            DatasetFileDocument("/[email protected]/ds/v1")
+
+    def test_requires_jwt_token_in_environment(self, monkeypatch):
+        monkeypatch.delenv("USER_JWT_TOKEN", raising=False)
+        monkeypatch.setenv("FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT", 
CUSTOM_ENDPOINT)
+        with pytest.raises(ValueError, match="JWT token is required"):
+            DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+
+    def test_treats_empty_jwt_as_missing(self, monkeypatch):
+        # An empty string is falsy and should be rejected just like an unset 
var.
+        monkeypatch.setenv("USER_JWT_TOKEN", "")
+        with pytest.raises(ValueError, match="JWT token is required"):
+            DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+
+    def test_falls_back_to_default_endpoint_when_env_missing(self, 
monkeypatch):
+        monkeypatch.setenv("USER_JWT_TOKEN", "tok")
+        monkeypatch.delenv("FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT", 
raising=False)
+        doc = DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+        assert doc.presign_endpoint == DEFAULT_ENDPOINT
+
+    def test_uses_explicit_endpoint_from_environment(self, auth_env):
+        doc = DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+        assert doc.presign_endpoint == CUSTOM_ENDPOINT
+
+
+class TestGetPresignedUrl:
+    def _make_doc(self, monkeypatch, path="/[email protected]/ds/v1/file.csv"):
+        monkeypatch.setenv("USER_JWT_TOKEN", "test-jwt-token")
+        monkeypatch.setenv("FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT", 
CUSTOM_ENDPOINT)
+        return DatasetFileDocument(path)
+
+    def test_returns_presigned_url_field_from_json_body(self, monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.return_value = make_response(
+                200, body={"presignedUrl": "https://signed.test/x"}
+            )
+            assert doc.get_presigned_url() == "https://signed.test/x";
+
+    def test_sends_bearer_authorization_header_with_jwt(self, monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.return_value = make_response(200, body={"presignedUrl": 
"u"})
+            doc.get_presigned_url()
+            _, kwargs = mock_get.call_args
+            assert kwargs["headers"] == {"Authorization": "Bearer 
test-jwt-token"}
+
+    def test_url_encodes_filepath_query_parameter(self, monkeypatch):
+        # urllib.parse.quote keeps "/" as safe by default, but encodes "@"
+        # and " " — pin both pieces so the contract is explicit.
+        doc = self._make_doc(monkeypatch, path="/[email protected]/ds/v1/data 
file.csv")
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.return_value = make_response(200, body={"presignedUrl": 
"u"})
+            doc.get_presigned_url()
+            _, kwargs = mock_get.call_args
+            file_path = kwargs["params"]["filePath"]
+            assert "data%20file.csv" in file_path
+            assert "bob%40x.com" in file_path
+            assert file_path.startswith("/")
+
+    def test_calls_configured_endpoint(self, monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.return_value = make_response(200, body={"presignedUrl": 
"u"})
+            doc.get_presigned_url()
+            args, _ = mock_get.call_args
+            assert args[0] == CUSTOM_ENDPOINT
+
+    def test_raises_runtime_error_with_status_and_body_on_failure(self, 
monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.return_value = make_response(403, body="forbidden")
+            with pytest.raises(RuntimeError, match="403"):
+                doc.get_presigned_url()
+
+
+class TestReadFile:
+    def _make_doc(self, monkeypatch):
+        monkeypatch.setenv("USER_JWT_TOKEN", "test-jwt-token")
+        monkeypatch.setenv("FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT", 
CUSTOM_ENDPOINT)
+        return DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+
+    def test_returns_bytesio_with_downloaded_content(self, monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.side_effect = [
+                make_response(200, body={"presignedUrl": 
"https://signed.test/x"}),
+                make_response(200, content=b"hello-bytes"),
+            ]
+            buf = doc.read_file()
+            assert isinstance(buf, io.BytesIO)
+            assert buf.read() == b"hello-bytes"
+
+    def test_propagates_presigned_url_failure(self, monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.return_value = make_response(500, body="upstream down")
+            with pytest.raises(RuntimeError, match="500"):
+                doc.read_file()
+
+    def test_raises_runtime_error_when_download_fails(self, monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.side_effect = [
+                make_response(200, body={"presignedUrl": 
"https://signed.test/x"}),
+                make_response(404, body="missing"),
+            ]
+            with pytest.raises(RuntimeError, match="404"):
+                doc.read_file()

Review Comment:
   `test_raises_runtime_error_when_download_fails` asserts only the status code 
("404"). Since `read_file()` includes `response.text` in its error message, it 
would be stronger to also assert the body text (e.g., "missing") is present (or 
rename the test if you only intend to check status).
   



##########
amber/src/main/python/pytexera/storage/test_dataset_file_document.py:
##########
@@ -0,0 +1,184 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import io
+
+import pytest
+from unittest.mock import patch, MagicMock
+
+from pytexera.storage.dataset_file_document import DatasetFileDocument
+
+
+DEFAULT_ENDPOINT = "http://localhost:9092/api/dataset/presign-download";
+CUSTOM_ENDPOINT = "https://example.test/api/presign";
+
+
[email protected]
+def auth_env(monkeypatch):
+    """Provide a JWT and pinned presign endpoint for the duration of one 
test."""
+    monkeypatch.setenv("USER_JWT_TOKEN", "test-jwt-token")
+    monkeypatch.setenv("FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT", 
CUSTOM_ENDPOINT)
+
+
+def make_response(status_code: int, body=None, content: bytes = b""):
+    response = MagicMock()
+    response.status_code = status_code
+    response.json.return_value = body or {}
+    response.text = "" if body is None else str(body)
+    response.content = content
+    return response
+
+
+class TestDatasetFileDocumentInit:
+    def test_parses_minimal_four_part_path(self, auth_env):
+        doc = DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+        assert doc.owner_email == "[email protected]"
+        assert doc.dataset_name == "ds"
+        assert doc.version_name == "v1"
+        assert doc.file_relative_path == "file.csv"
+
+    def test_joins_nested_relative_path_back_with_slashes(self, auth_env):
+        doc = DatasetFileDocument("/[email protected]/ds/v1/a/b/c/file.csv")
+        assert doc.file_relative_path == "a/b/c/file.csv"
+
+    def test_strips_leading_and_trailing_slashes_before_parsing(self, 
auth_env):
+        doc = DatasetFileDocument("///[email protected]/ds/v1/file.csv///")
+        assert doc.owner_email == "[email protected]"
+        assert doc.file_relative_path == "file.csv"
+
+    def test_rejects_path_with_fewer_than_four_segments(self, auth_env):
+        with pytest.raises(ValueError, match="Invalid file path format"):
+            DatasetFileDocument("/[email protected]/ds/v1")
+
+    def test_requires_jwt_token_in_environment(self, monkeypatch):
+        monkeypatch.delenv("USER_JWT_TOKEN", raising=False)
+        monkeypatch.setenv("FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT", 
CUSTOM_ENDPOINT)
+        with pytest.raises(ValueError, match="JWT token is required"):
+            DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+
+    def test_treats_empty_jwt_as_missing(self, monkeypatch):
+        # An empty string is falsy and should be rejected just like an unset 
var.
+        monkeypatch.setenv("USER_JWT_TOKEN", "")
+        with pytest.raises(ValueError, match="JWT token is required"):
+            DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+
+    def test_falls_back_to_default_endpoint_when_env_missing(self, 
monkeypatch):
+        monkeypatch.setenv("USER_JWT_TOKEN", "tok")
+        monkeypatch.delenv("FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT", 
raising=False)
+        doc = DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+        assert doc.presign_endpoint == DEFAULT_ENDPOINT
+
+    def test_uses_explicit_endpoint_from_environment(self, auth_env):
+        doc = DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+        assert doc.presign_endpoint == CUSTOM_ENDPOINT
+
+
+class TestGetPresignedUrl:
+    def _make_doc(self, monkeypatch, path="/[email protected]/ds/v1/file.csv"):
+        monkeypatch.setenv("USER_JWT_TOKEN", "test-jwt-token")
+        monkeypatch.setenv("FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT", 
CUSTOM_ENDPOINT)
+        return DatasetFileDocument(path)
+
+    def test_returns_presigned_url_field_from_json_body(self, monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.return_value = make_response(
+                200, body={"presignedUrl": "https://signed.test/x"}
+            )
+            assert doc.get_presigned_url() == "https://signed.test/x";
+

Review Comment:
   The tests don't cover the case where `get_presigned_url()` receives a 200 
response whose JSON body omits `presignedUrl` (current implementation returns 
`None`, and `read_file()` will then call `requests.get(None)`). Adding a test 
for this scenario (either to document current behavior or to pin an expected 
exception once fixed) would better cover the failure paths described in the 
linked issue/PR description.



##########
amber/src/main/python/pytexera/storage/test_dataset_file_document.py:
##########
@@ -0,0 +1,184 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import io
+
+import pytest
+from unittest.mock import patch, MagicMock
+
+from pytexera.storage.dataset_file_document import DatasetFileDocument
+
+
+DEFAULT_ENDPOINT = "http://localhost:9092/api/dataset/presign-download";
+CUSTOM_ENDPOINT = "https://example.test/api/presign";
+
+
[email protected]
+def auth_env(monkeypatch):
+    """Provide a JWT and pinned presign endpoint for the duration of one 
test."""
+    monkeypatch.setenv("USER_JWT_TOKEN", "test-jwt-token")
+    monkeypatch.setenv("FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT", 
CUSTOM_ENDPOINT)
+
+
+def make_response(status_code: int, body=None, content: bytes = b""):
+    response = MagicMock()
+    response.status_code = status_code
+    response.json.return_value = body or {}
+    response.text = "" if body is None else str(body)
+    response.content = content
+    return response
+
+
+class TestDatasetFileDocumentInit:
+    def test_parses_minimal_four_part_path(self, auth_env):
+        doc = DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+        assert doc.owner_email == "[email protected]"
+        assert doc.dataset_name == "ds"
+        assert doc.version_name == "v1"
+        assert doc.file_relative_path == "file.csv"
+
+    def test_joins_nested_relative_path_back_with_slashes(self, auth_env):
+        doc = DatasetFileDocument("/[email protected]/ds/v1/a/b/c/file.csv")
+        assert doc.file_relative_path == "a/b/c/file.csv"
+
+    def test_strips_leading_and_trailing_slashes_before_parsing(self, 
auth_env):
+        doc = DatasetFileDocument("///[email protected]/ds/v1/file.csv///")
+        assert doc.owner_email == "[email protected]"
+        assert doc.file_relative_path == "file.csv"
+
+    def test_rejects_path_with_fewer_than_four_segments(self, auth_env):
+        with pytest.raises(ValueError, match="Invalid file path format"):
+            DatasetFileDocument("/[email protected]/ds/v1")
+
+    def test_requires_jwt_token_in_environment(self, monkeypatch):
+        monkeypatch.delenv("USER_JWT_TOKEN", raising=False)
+        monkeypatch.setenv("FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT", 
CUSTOM_ENDPOINT)
+        with pytest.raises(ValueError, match="JWT token is required"):
+            DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+
+    def test_treats_empty_jwt_as_missing(self, monkeypatch):
+        # An empty string is falsy and should be rejected just like an unset 
var.
+        monkeypatch.setenv("USER_JWT_TOKEN", "")
+        with pytest.raises(ValueError, match="JWT token is required"):
+            DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+
+    def test_falls_back_to_default_endpoint_when_env_missing(self, 
monkeypatch):
+        monkeypatch.setenv("USER_JWT_TOKEN", "tok")
+        monkeypatch.delenv("FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT", 
raising=False)
+        doc = DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+        assert doc.presign_endpoint == DEFAULT_ENDPOINT
+
+    def test_uses_explicit_endpoint_from_environment(self, auth_env):
+        doc = DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+        assert doc.presign_endpoint == CUSTOM_ENDPOINT
+
+
+class TestGetPresignedUrl:
+    def _make_doc(self, monkeypatch, path="/[email protected]/ds/v1/file.csv"):
+        monkeypatch.setenv("USER_JWT_TOKEN", "test-jwt-token")
+        monkeypatch.setenv("FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT", 
CUSTOM_ENDPOINT)
+        return DatasetFileDocument(path)
+
+    def test_returns_presigned_url_field_from_json_body(self, monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.return_value = make_response(
+                200, body={"presignedUrl": "https://signed.test/x"}
+            )
+            assert doc.get_presigned_url() == "https://signed.test/x";
+
+    def test_sends_bearer_authorization_header_with_jwt(self, monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.return_value = make_response(200, body={"presignedUrl": 
"u"})
+            doc.get_presigned_url()
+            _, kwargs = mock_get.call_args
+            assert kwargs["headers"] == {"Authorization": "Bearer 
test-jwt-token"}
+
+    def test_url_encodes_filepath_query_parameter(self, monkeypatch):
+        # urllib.parse.quote keeps "/" as safe by default, but encodes "@"
+        # and " " — pin both pieces so the contract is explicit.
+        doc = self._make_doc(monkeypatch, path="/[email protected]/ds/v1/data 
file.csv")
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.return_value = make_response(200, body={"presignedUrl": 
"u"})
+            doc.get_presigned_url()
+            _, kwargs = mock_get.call_args
+            file_path = kwargs["params"]["filePath"]
+            assert "data%20file.csv" in file_path
+            assert "bob%40x.com" in file_path
+            assert file_path.startswith("/")
+
+    def test_calls_configured_endpoint(self, monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.return_value = make_response(200, body={"presignedUrl": 
"u"})
+            doc.get_presigned_url()
+            args, _ = mock_get.call_args
+            assert args[0] == CUSTOM_ENDPOINT
+
+    def test_raises_runtime_error_with_status_and_body_on_failure(self, 
monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.return_value = make_response(403, body="forbidden")
+            with pytest.raises(RuntimeError, match="403"):
+                doc.get_presigned_url()
+
+
+class TestReadFile:
+    def _make_doc(self, monkeypatch):
+        monkeypatch.setenv("USER_JWT_TOKEN", "test-jwt-token")
+        monkeypatch.setenv("FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT", 
CUSTOM_ENDPOINT)
+        return DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+
+    def test_returns_bytesio_with_downloaded_content(self, monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.side_effect = [
+                make_response(200, body={"presignedUrl": 
"https://signed.test/x"}),
+                make_response(200, content=b"hello-bytes"),
+            ]
+            buf = doc.read_file()
+            assert isinstance(buf, io.BytesIO)
+            assert buf.read() == b"hello-bytes"
+
+    def test_propagates_presigned_url_failure(self, monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.return_value = make_response(500, body="upstream down")
+            with pytest.raises(RuntimeError, match="500"):

Review Comment:
   `test_propagates_presigned_url_failure` only matches the status code ("500") 
even though `DatasetFileDocument.get_presigned_url()` includes the response 
body in the error text. Consider also asserting that the raised message 
contains the body (e.g., "upstream down") so the test actually validates the 
more-informative error detail.
   



##########
amber/src/main/python/pytexera/storage/test_dataset_file_document.py:
##########
@@ -0,0 +1,184 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import io
+
+import pytest
+from unittest.mock import patch, MagicMock
+
+from pytexera.storage.dataset_file_document import DatasetFileDocument
+
+
+DEFAULT_ENDPOINT = "http://localhost:9092/api/dataset/presign-download";
+CUSTOM_ENDPOINT = "https://example.test/api/presign";
+
+
[email protected]
+def auth_env(monkeypatch):
+    """Provide a JWT and pinned presign endpoint for the duration of one 
test."""
+    monkeypatch.setenv("USER_JWT_TOKEN", "test-jwt-token")
+    monkeypatch.setenv("FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT", 
CUSTOM_ENDPOINT)
+
+
+def make_response(status_code: int, body=None, content: bytes = b""):
+    response = MagicMock()
+    response.status_code = status_code
+    response.json.return_value = body or {}
+    response.text = "" if body is None else str(body)
+    response.content = content
+    return response
+
+
+class TestDatasetFileDocumentInit:
+    def test_parses_minimal_four_part_path(self, auth_env):
+        doc = DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+        assert doc.owner_email == "[email protected]"
+        assert doc.dataset_name == "ds"
+        assert doc.version_name == "v1"
+        assert doc.file_relative_path == "file.csv"
+
+    def test_joins_nested_relative_path_back_with_slashes(self, auth_env):
+        doc = DatasetFileDocument("/[email protected]/ds/v1/a/b/c/file.csv")
+        assert doc.file_relative_path == "a/b/c/file.csv"
+
+    def test_strips_leading_and_trailing_slashes_before_parsing(self, 
auth_env):
+        doc = DatasetFileDocument("///[email protected]/ds/v1/file.csv///")
+        assert doc.owner_email == "[email protected]"
+        assert doc.file_relative_path == "file.csv"
+
+    def test_rejects_path_with_fewer_than_four_segments(self, auth_env):
+        with pytest.raises(ValueError, match="Invalid file path format"):
+            DatasetFileDocument("/[email protected]/ds/v1")
+
+    def test_requires_jwt_token_in_environment(self, monkeypatch):
+        monkeypatch.delenv("USER_JWT_TOKEN", raising=False)
+        monkeypatch.setenv("FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT", 
CUSTOM_ENDPOINT)
+        with pytest.raises(ValueError, match="JWT token is required"):
+            DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+
+    def test_treats_empty_jwt_as_missing(self, monkeypatch):
+        # An empty string is falsy and should be rejected just like an unset 
var.
+        monkeypatch.setenv("USER_JWT_TOKEN", "")
+        with pytest.raises(ValueError, match="JWT token is required"):
+            DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+
+    def test_falls_back_to_default_endpoint_when_env_missing(self, 
monkeypatch):
+        monkeypatch.setenv("USER_JWT_TOKEN", "tok")
+        monkeypatch.delenv("FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT", 
raising=False)
+        doc = DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+        assert doc.presign_endpoint == DEFAULT_ENDPOINT
+
+    def test_uses_explicit_endpoint_from_environment(self, auth_env):
+        doc = DatasetFileDocument("/[email protected]/ds/v1/file.csv")
+        assert doc.presign_endpoint == CUSTOM_ENDPOINT
+
+
+class TestGetPresignedUrl:
+    def _make_doc(self, monkeypatch, path="/[email protected]/ds/v1/file.csv"):
+        monkeypatch.setenv("USER_JWT_TOKEN", "test-jwt-token")
+        monkeypatch.setenv("FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT", 
CUSTOM_ENDPOINT)
+        return DatasetFileDocument(path)
+
+    def test_returns_presigned_url_field_from_json_body(self, monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.return_value = make_response(
+                200, body={"presignedUrl": "https://signed.test/x"}
+            )
+            assert doc.get_presigned_url() == "https://signed.test/x";
+
+    def test_sends_bearer_authorization_header_with_jwt(self, monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.return_value = make_response(200, body={"presignedUrl": 
"u"})
+            doc.get_presigned_url()
+            _, kwargs = mock_get.call_args
+            assert kwargs["headers"] == {"Authorization": "Bearer 
test-jwt-token"}
+
+    def test_url_encodes_filepath_query_parameter(self, monkeypatch):
+        # urllib.parse.quote keeps "/" as safe by default, but encodes "@"
+        # and " " — pin both pieces so the contract is explicit.
+        doc = self._make_doc(monkeypatch, path="/[email protected]/ds/v1/data 
file.csv")
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.return_value = make_response(200, body={"presignedUrl": 
"u"})
+            doc.get_presigned_url()
+            _, kwargs = mock_get.call_args
+            file_path = kwargs["params"]["filePath"]
+            assert "data%20file.csv" in file_path
+            assert "bob%40x.com" in file_path
+            assert file_path.startswith("/")
+
+    def test_calls_configured_endpoint(self, monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.return_value = make_response(200, body={"presignedUrl": 
"u"})
+            doc.get_presigned_url()
+            args, _ = mock_get.call_args
+            assert args[0] == CUSTOM_ENDPOINT
+
+    def test_raises_runtime_error_with_status_and_body_on_failure(self, 
monkeypatch):
+        doc = self._make_doc(monkeypatch)
+        with patch("pytexera.storage.dataset_file_document.requests.get") as 
mock_get:
+            mock_get.return_value = make_response(403, body="forbidden")
+            with pytest.raises(RuntimeError, match="403"):

Review Comment:
   `test_raises_runtime_error_with_status_and_body_on_failure` name/doc implies 
the RuntimeError message includes both status and body, but the assertion only 
matches the status code. Either assert that the exception message also contains 
the response body (e.g., "forbidden") or rename the test to reflect what it 
actually verifies.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to