sbp commented on code in PR #579:
URL: 
https://github.com/apache/tooling-trusted-releases/pull/579#discussion_r2717434778


##########
atr/ssh.py:
##########
@@ -312,26 +318,71 @@ async def _step_04_command_validate(
             # Projects are public, so existence information is public
             raise RsyncArgsError(f"Project '{path_project}' does not exist")
 
-        release = await data.release(project_name=project.name, 
version=path_version).get()
+        release = await data.release(project_name=project.name, 
version=path_version, _release_policy=True).get()
 
     if is_read_request:
         #################################################
         ### Calls _step_06a_validate_read_permissions ###
         #################################################
-        validated_release = await _step_06a_validate_read_permissions(
-            ssh_uid, project, release, path_project, path_version
+        validated_release, file_patterns = await 
_step_06a_validate_read_permissions(
+            ssh_uid, project, release, path_project, path_version, tag
         )
-        return path_project, path_version, validated_release
+        return path_project, path_version, file_patterns, validated_release
 
     ##################################################
     ### Calls _step_06b_validate_write_permissions ###
     ##################################################
     await _step_06b_validate_write_permissions(ssh_uid, project, release)
-    # Return None for the release object for write requests
-    return path_project, path_version, None
+    # Return None for the tag and release objects for write requests
+    return path_project, path_version, None, None
 
 
-def _step_05_command_path_validate(path: str) -> tuple[str, str]:
+def _step_05a_command_path_validate_read(path: str) -> tuple[str, str, str | 
None]:
+    """Validate the path argument for rsync commands."""
+    # READ: rsync --server --sender -vlogDtpre.iLsfxCIvu . /proj/v1/
+    # Validating path: /proj/v1/
+    # WRITE: rsync --server -vlogDtpre.iLsfxCIvu . /proj/v1/
+    # Validating path: /proj/v1/
+
+    if not path.startswith("/"):
+        raise RsyncArgsError("The path argument should be an absolute path")
+
+    if not path.endswith("/"):
+        # Technically we could ignore this, because we rewrite the path anyway 
for writes
+        # But we should enforce good rsync usage practices
+        raise RsyncArgsError("The path argument should be a directory path, 
ending with a /")
+
+    if "//" in path:
+        raise RsyncArgsError("The path argument should not contain //")
+
+    if path.count("/") < 3 or path.count("/") > 4:
+        raise RsyncArgsError("The path argument should be a 
/PROJECT/VERSION/(tag)/ directory path")
+
+    path_project, path_version, *rest = path.strip("/").split("/", 2)
+    tag = rest[0] if rest else None
+    alphanum = set(string.ascii_letters + string.digits + "-")
+    if not all(c in alphanum for c in path_project):
+        raise RsyncArgsError("The project name should contain only 
alphanumeric characters or hyphens")
+
+    if tag and (not all(c in alphanum for c in path_project)):

Review Comment:
   Presumably should be `tag` rather than `path_project` at the end.



##########
atr/ssh.py:
##########
@@ -352,11 +403,15 @@ def _step_05_command_path_validate(path: str) -> 
tuple[str, str]:
     if path.count("/") != 3:
         raise RsyncArgsError("The path argument should be a /PROJECT/VERSION/ 
directory path")
 
-    path_project, path_version = path.strip("/").split("/", 1)
+    path_project, path_version, *rest = path.strip("/").split("/", 2)
+    tag = rest[0] if rest else None
     alphanum = set(string.ascii_letters + string.digits + "-")
     if not all(c in alphanum for c in path_project):
         raise RsyncArgsError("The project name should contain only 
alphanumeric characters or hyphens")
 
+    if tag and (not all(c in alphanum for c in path_project)):

Review Comment:
   Presumably should be `tag` rather than `path_project` at the end.



##########
atr/storage/writers/policy.py:
##########
@@ -95,13 +97,15 @@ async def edit_compose(self, form: 
shared.projects.ComposePolicyForm) -> None:
         project_name = form.project_name
         _, release_policy = await self.__get_or_create_policy(project_name)
 
+        atr_tags_dict = yaml.safe_load(form.atr_file_tagging_spec) or {}

Review Comment:
   As well as using `strictyaml`, perhaps we should catch errors here and 
display a nicer message. I expect that it's quite likely that we'll get 
malformed inputs.



##########
atr/ssh.py:
##########
@@ -352,11 +403,15 @@ def _step_05_command_path_validate(path: str) -> 
tuple[str, str]:
     if path.count("/") != 3:
         raise RsyncArgsError("The path argument should be a /PROJECT/VERSION/ 
directory path")
 
-    path_project, path_version = path.strip("/").split("/", 1)
+    path_project, path_version, *rest = path.strip("/").split("/", 2)

Review Comment:
   We should probably improve the `if path.count("/") != 3" validation directly 
above to ensure that this really is `/PROJECT/VERSION/` so that this line and 
the following line can be simpler and clearer.



##########
atr/storage/writers/policy.py:
##########
@@ -95,13 +97,15 @@ async def edit_compose(self, form: 
shared.projects.ComposePolicyForm) -> None:
         project_name = form.project_name
         _, release_policy = await self.__get_or_create_policy(project_name)
 
+        atr_tags_dict = yaml.safe_load(form.atr_file_tagging_spec) or {}

Review Comment:
   Also, we don't validate `atr_tags_dict`. Since we're allowing arbitrary 
keys, a Pydantic class probably isn't going to work for this, but we should add 
a strict `dict[str, list[str]]` type guard.



##########
migrations/versions/0041_2026.01.21_44cdc6b9.py:
##########
@@ -0,0 +1,27 @@
+"""Add ATR tagging spec policy
+
+Revision ID: 0041_2026.01.21_44cdc6b9
+Revises: 0040_2026.01.15_31d91cc5
+Create Date: 2026-01-21 15:52:13.681523+00:00
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from alembic import op
+
+# Revision identifiers, used by Alembic
+revision: str = "0041_2026.01.21_44cdc6b9"
+down_revision: str | None = "0040_2026.01.15_31d91cc5"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    with op.batch_alter_table("releasepolicy", schema=None) as batch_op:
+        batch_op.add_column(sa.Column("atr_file_tagging_spec", sa.JSON(), 
nullable=False, server_default="{}"))

Review Comment:
   How about `file_tag_mappings`? (I've applied this migration too, so you 
won't be the only one downgrading!)
   



##########
atr/ssh.py:
##########
@@ -441,10 +506,14 @@ async def _step_07a_process_validated_rsync_read(
         if not await aiofiles.os.path.isdir(source_dir):
             raise RsyncArgsError(f"Source directory '{source_dir}' not found 
for release {release.name}")
 
-        # Update the rsync command path to the determined source directory
-        argv[-1] = str(source_dir)
-        if not argv[-1].endswith("/"):
-            argv[-1] += "/"
+        if file_patterns is None:
+            # Update the rsync command path to the determined source directory
+            argv[-1] = str(source_dir)
+            if not argv[-1].endswith("/"):
+                argv[-1] += "/"
+        else:
+            files = [f for pattern in file_patterns for f in 
glob.glob(f"{source_dir}/{pattern}")]

Review Comment:
   Synchronous call (`glob.glob`) in an asynchronous function.



##########
atr/ssh.py:
##########
@@ -441,10 +506,14 @@ async def _step_07a_process_validated_rsync_read(
         if not await aiofiles.os.path.isdir(source_dir):
             raise RsyncArgsError(f"Source directory '{source_dir}' not found 
for release {release.name}")
 
-        # Update the rsync command path to the determined source directory
-        argv[-1] = str(source_dir)
-        if not argv[-1].endswith("/"):
-            argv[-1] += "/"
+        if file_patterns is None:
+            # Update the rsync command path to the determined source directory
+            argv[-1] = str(source_dir)
+            if not argv[-1].endswith("/"):
+                argv[-1] += "/"
+        else:
+            files = [f for pattern in file_patterns for f in 
glob.glob(f"{source_dir}/{pattern}")]

Review Comment:
   The `pattern` is provided by the user, so if it contained `../..` I think 
this would be a path traversal.



##########
pyproject.toml:
##########
@@ -42,6 +42,7 @@ dependencies = [
   # "pynacl>=1.5.0",
   "python-decouple~=3.8",
   "python-gnupg~=0.5",
+  "pyyaml>=6.0.3",

Review Comment:
   Can we use `strictyaml` instead?



##########
atr/storage/writers/policy.py:
##########
@@ -95,13 +97,15 @@ async def edit_compose(self, form: 
shared.projects.ComposePolicyForm) -> None:
         project_name = form.project_name
         _, release_policy = await self.__get_or_create_policy(project_name)
 
+        atr_tags_dict = yaml.safe_load(form.atr_file_tagging_spec) or {}

Review Comment:
   Let's also check for path traversal here, as early as possible.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to