sbp commented on code in PR #825:
URL: 
https://github.com/apache/tooling-trusted-releases/pull/825#discussion_r2892266511


##########
atr/models/sql.py:
##########
@@ -264,6 +264,49 @@ class VoteEntry(schema.Strict):
 # Type decorators
 # TODO: Possibly move these to a separate module
 # That way, we can more easily track Alembic's dependence on them
+class SafeProjectName(sqlalchemy.types.TypeDecorator):

Review Comment:
   Are these classes actually used yet? I can't find any other reference to 
them. I thought they were being used when I reviewed this before, so possibly a 
refactoring thing?



##########
atr/admin/__init__.py:
##########
@@ -840,8 +847,8 @@ async def tasks_recent(session: web.Committer, minutes: 
int) -> str:
                     htpy.td[task.status.value],
                     htpy.td[task.added.strftime("%H:%M:%S") if task.added else 
""],
                     htpy.td[took_text],
-                    htpy.td[task.project_name or ""],
-                    htpy.td[task.version_name or ""],
+                    htpy.td[str(task.project_name) or ""],

Review Comment:
   ```
   >>> str(None)
   'None'
   ```
   
   Similar to the bug in `worker.py`. Basically anywhere that 
`task.project_name` is used we have to account for it being `None`. I guess in 
this case we could solve it by pushing `or ""` inside the `str(...)` and then 
adding this to the `safe` strings:
   
   ```
   def __bool__(self) -> bool:
       return True
   ```
   
   Because their values can't be empty, so they're always truthy. Note that 
because they're objects, they're already truthy, but I think that adding this 
method would be a good idea to make it explicit. We could even make it return 
the `bool(...)` of the value, just in case we decided to start allowing empty 
values for one of the safe strings.



##########
atr/models/safe.py:
##########


Review Comment:
   Refactoring bug: this code is now in `Alphanumeric` but still uses 
`ProjectName`.



##########
atr/worker.py:
##########
@@ -118,21 +119,25 @@ async def _execute_check_task(
             f"Task {task_id} ({task_type}) has non-dict raw args {task_args} 
which should represent keyword_args"
         )
 
+    project_name = safe.ProjectName(task_obj.project_name)
+    version_name = safe.VersionName(task_obj.version_name)
+
+    # TODO: The recorder needs values but what do we do if the task doesn't 
have a project name?
     async def recorder_factory() -> checks.Recorder:
         return await checks.Recorder.create(
             checker=handler,
             inputs_hash=task_obj.inputs_hash or "",
-            project_name=task_obj.project_name or "",
-            version_name=task_obj.version_name or "",
+            project_name=project_name or safe.ProjectName(""),

Review Comment:
   ```
   >>> import atr.models.safe
   >>> atr.models.safe.ProjectName("")
   Traceback (most recent call last):
     File "<python-input-1>", line 1, in <module>
       atr.models.safe.ProjectName("")
       ~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^
     File "/.../atr/models/safe.py", line 43, in __init__
       raise ValueError("Value cannot be empty")
   ValueError: Value cannot be empty
   ```
   
   Can we just allow this to be `None` inside the recorder perhaps? Will need 
to look more closely at that.



##########
atr/models/safe.py:
##########


Review Comment:
   Same kind of refactoring bug here. Should be `Alphanumeric` rather than 
`ProjectName`.



##########
atr/models/safe.py:
##########
@@ -15,13 +15,38 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import annotations
 
-class ProjectName:
-    """A project name that has been validated against the cache or database."""
+import string
+import unicodedata
+from typing import Any, Final
 
+_ALPHANUM: Final = frozenset(string.ascii_letters + string.digits + "-")
+_VERSION_CHARS: Final = _ALPHANUM | frozenset(".+")
+
+
+class Alphanumeric:
     __slots__ = ("_value",)
 
+    @classmethod
+    def _valid_chars(cls) -> frozenset[str]:
+        # default is the base set; subclasses can override this method
+        return _ALPHANUM
+
+    def _additional_validations(self, value):

Review Comment:
   Missing type signatures.



##########
atr/worker.py:
##########
@@ -118,21 +119,25 @@ async def _execute_check_task(
             f"Task {task_id} ({task_type}) has non-dict raw args {task_args} 
which should represent keyword_args"
         )
 
+    project_name = safe.ProjectName(task_obj.project_name)
+    version_name = safe.VersionName(task_obj.version_name)
+
+    # TODO: The recorder needs values but what do we do if the task doesn't 
have a project name?
     async def recorder_factory() -> checks.Recorder:
         return await checks.Recorder.create(
             checker=handler,
             inputs_hash=task_obj.inputs_hash or "",
-            project_name=task_obj.project_name or "",
-            version_name=task_obj.version_name or "",
+            project_name=project_name or safe.ProjectName(""),

Review Comment:
   Some tasks really do get `project_name=None`, so they'd currently break.



##########
atr/models/safe.py:
##########
@@ -15,13 +15,38 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import annotations
 
-class ProjectName:
-    """A project name that has been validated against the cache or database."""
+import string
+import unicodedata
+from typing import Any, Final
 
+_ALPHANUM: Final = frozenset(string.ascii_letters + string.digits + "-")
+_VERSION_CHARS: Final = _ALPHANUM | frozenset(".+")
+
+
+class Alphanumeric:
     __slots__ = ("_value",)
 
+    @classmethod
+    def _valid_chars(cls) -> frozenset[str]:
+        # default is the base set; subclasses can override this method
+        return _ALPHANUM
+
+    def _additional_validations(self, value):
+        pass
+
     def __init__(self, value: str) -> None:
+        _assert_standard_safe_syntax(value)
+
+        if not value:

Review Comment:
   Probably better to put this before the `_assert_standard_safe_syntax`?



##########
atr/models/safe.py:
##########
@@ -38,25 +63,50 @@ def __repr__(self) -> str:
     def __str__(self) -> str:
         return self._value
 
+    @classmethod
+    def __get_pydantic_core_schema__(cls, _source_type: Any, _handler: Any) -> 
Any:
+        import pydantic_core.core_schema as core_schema
 
-class VersionName:
-    """A version name that has been validated against the cache or database."""
+        return core_schema.no_info_plain_validator_function(
+            lambda v: cls(v) if isinstance(v, str) else v,
+            serialization=core_schema.to_string_ser_schema(),
+        )
 
-    __slots__ = ("_value",)
 
-    def __init__(self, value: str) -> None:
-        self._value = value
+class ProjectName(Alphanumeric):
+    """A project name that has been validated for safety."""
 
-    def __eq__(self, other: object) -> bool:
-        if isinstance(other, VersionName):
-            return self._value == other._value
-        return NotImplemented
 
-    def __hash__(self) -> int:
-        return hash(self._value)
+class ReleaseName(Alphanumeric):
+    """A release name composed from a validated ProjectName and VersionName."""
 
-    def __repr__(self) -> str:
-        return f"VersionName({self._value!r})"
+    @classmethod
+    def _valid_chars(cls) -> frozenset[str]:
+        return _VERSION_CHARS
 
-    def __str__(self) -> str:
-        return self._value
+
+class VersionName(Alphanumeric):
+    """A version name that has been validated for safety"""
+
+    @classmethod
+    def _valid_chars(cls) -> frozenset[str]:
+        return _VERSION_CHARS
+
+    def _additional_validations(self, value):
+        if value[0] not in _ALPHANUM:
+            raise ValueError("A version should start with an alphanumeric 
character")
+        if value[-1] not in _ALPHANUM:
+            raise ValueError("A version should end with an alphanumeric 
character")
+
+
+def _assert_standard_safe_syntax(value: str) -> None:

Review Comment:
   What should we do about markup and JSON etc. metacharacters? I guess the 
intent is that we use this more widely outside of `Alphanumeric`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to