This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git


The following commit(s) were added to refs/heads/main by this push:
     new 2a3d16c  Scan files using puremagic on upload
2a3d16c is described below

commit 2a3d16cc0639e944597461bb1303348900a0f946
Author: Sean B. Palmer <[email protected]>
AuthorDate: Tue Dec 16 15:19:06 2025 +0000

    Scan files using puremagic on upload
---
 atr/detection.py                | 96 +++++++++++++++++++++++++++++++++++++++++
 atr/docs/code-conventions.md    |  2 +-
 atr/get/sbom.py                 |  2 +-
 atr/sbom/models/conformance.py  |  2 +-
 atr/storage/writers/revision.py |  7 +++
 pyproject.toml                  |  1 +
 tests/e2e/compose/test_get.py   |  2 +-
 uv.lock                         | 11 +++++
 8 files changed, 119 insertions(+), 4 deletions(-)

diff --git a/atr/detection.py b/atr/detection.py
new file mode 100644
index 0000000..0e52ce4
--- /dev/null
+++ b/atr/detection.py
@@ -0,0 +1,96 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pathlib
+from typing import Final
+
+import puremagic
+
+_BZIP2_TYPES: Final[set[str]] = {"application/x-bzip2"}
+_DEB_TYPES: Final[set[str]] = {"application/vnd.debian.binary-package", 
"application/x-archive"}
+_EXE_TYPES: Final[set[str]] = 
{"application/vnd.microsoft.portable-executable", "application/octet-stream"}
+_GZIP_TYPES: Final[set[str]] = {"application/x-gzip", "application/x-tgz"}
+_PDF_TYPES: Final[set[str]] = {"application/pdf"}
+_RPM_TYPES: Final[set[str]] = {"application/x-rpm"}
+_TAR_TYPES: Final[set[str]] = {"application/x-tar"}
+_XZ_TYPES: Final[set[str]] = {"application/x-xz"}
+_ZIP_TYPES: Final[set[str]] = {"application/zip", "application/java-archive"}
+
+_EXPECTED: Final[dict[str, set[str]]] = {
+    ".apk": _ZIP_TYPES,
+    ".bin.zip": _ZIP_TYPES,
+    ".deb": _DEB_TYPES,
+    ".exe": _EXE_TYPES,
+    ".jar": _ZIP_TYPES,
+    ".nar": _ZIP_TYPES,
+    ".nbm": _ZIP_TYPES,
+    ".pack.gz": _GZIP_TYPES,
+    ".pdf": _PDF_TYPES,
+    ".rpm": _RPM_TYPES,
+    ".src.tgz": _GZIP_TYPES,
+    ".src.zip": _ZIP_TYPES,
+    ".tar": _TAR_TYPES,
+    ".tar.bz2": _BZIP2_TYPES,
+    ".tar.gz": _GZIP_TYPES,
+    ".tar.xz": _XZ_TYPES,
+    ".tgz": _GZIP_TYPES,
+    ".vsix": _ZIP_TYPES,
+    ".war": _ZIP_TYPES,
+    ".whl": _ZIP_TYPES,
+    ".zip": _ZIP_TYPES,
+}
+
+_COMPOUND_SUFFIXES: Final = tuple(s for s in _EXPECTED if s.count(".") > 1)
+
+
+def validate_directory(directory: pathlib.Path) -> list[str]:
+    # TODO: Report errors using the whole relative path, not just the filename
+    errors: list[str] = []
+    for path in directory.rglob("*"):
+        if path.is_symlink():
+            errors.append(f"{path.name}: Symbolic links are not allowed")
+            continue
+        if path.is_file():
+            if error := _validate_file(path):
+                errors.append(error)
+    return errors
+
+
+def _suffix(filename: str) -> str:
+    name = filename.lower()
+    for compound in _COMPOUND_SUFFIXES:
+        if name.endswith(compound):
+            return compound
+    return pathlib.Path(name).suffix
+
+
+def _validate_file(path: pathlib.Path) -> str | None:
+    # TODO: Report errors using the whole relative path, not just the filename
+    suffix = _suffix(path.name)
+    if suffix not in _EXPECTED:
+        return None
+    if path.stat().st_size == 0:
+        return f"{path.name}: Empty file"
+    try:
+        results = puremagic.magic_file(path)
+    except puremagic.PureError:
+        return f"{path.name}: Unidentified file format (expected {suffix})"
+    detected_types = {r.mime_type for r in results}
+    if not (detected_types & _EXPECTED[suffix]):
+        primary = results[0].mime_type if results else "unknown"
+        return f"{path.name}: Content mismatch (expected {suffix}, detected 
{primary})"
+    return None
diff --git a/atr/docs/code-conventions.md b/atr/docs/code-conventions.md
index 0d54cce..04e20ea 100644
--- a/atr/docs/code-conventions.md
+++ b/atr/docs/code-conventions.md
@@ -194,7 +194,7 @@ Maintain modules with a reasonable number of interfaces. 
Though no strict limits
 
 ### Sort functions alphabetically
 
-Wherever possible, the order of functions within each module should be 
alphabetical by name. Take advantage of this convention by grouping related 
functions under a common prefix (including grouping helper functions with their 
caller), and using numbers in the names of functions called in serial order.
+Wherever possible, the order of functions within each module should be 
alphabetical by name. Take advantage of this convention by grouping related 
functions under a common prefix (including grouping helper functions with their 
caller), and using numbers in the names of functions called in serial order. As 
an exception to this rule, underscored (and hence private) functions are sorted 
below public functions.
 
 ### Keep cyclomatic complexity below 10
 
diff --git a/atr/get/sbom.py b/atr/get/sbom.py
index dc88b7e..fb570a7 100644
--- a/atr/get/sbom.py
+++ b/atr/get/sbom.py
@@ -162,7 +162,7 @@ def _missing_table(block: htm.Block, items: 
list[sbom.models.conformance.Missing
         htm.tr[
             htm.td[
                 kind.upper()
-                if len(components) == 0
+                if (len(components) == 0)
                 else htm.details[htm.summary[kind.upper()], 
htm.div[_detail_table(components)]]
             ],
             htm.td[prop],
diff --git a/atr/sbom/models/conformance.py b/atr/sbom/models/conformance.py
index bdce64a..7b02adb 100644
--- a/atr/sbom/models/conformance.py
+++ b/atr/sbom/models/conformance.py
@@ -61,7 +61,7 @@ class MissingComponentProperty(Strict):
     index: int | None = None
 
     def __str__(self) -> str:
-        if self.index is None or self.component is None:
+        if (self.index is None) or (self.component is None):
             comp = "primary component"
         else:
             comp = self.component
diff --git a/atr/storage/writers/revision.py b/atr/storage/writers/revision.py
index 904625f..24e681a 100644
--- a/atr/storage/writers/revision.py
+++ b/atr/storage/writers/revision.py
@@ -31,6 +31,7 @@ import aioshutil
 
 import atr.db as db
 import atr.db.interaction as interaction
+import atr.detection as detection
 import atr.models.sql as sql
 import atr.storage as storage
 import atr.storage.types as types
@@ -142,6 +143,12 @@ class CommitteeParticipant(FoundationCommitter):
             await aioshutil.rmtree(temp_dir)
             raise
 
+        validation_errors = await 
asyncio.to_thread(detection.validate_directory, temp_dir_path)
+        if validation_errors:
+            await aioshutil.rmtree(temp_dir)
+            creating.failed = types.FailedError("File validation failed:\n" + 
"\n".join(validation_errors))
+            return
+
         # Ensure that the permissions of every directory are 755
         try:
             await asyncio.to_thread(util.chmod_directories, temp_dir_path)
diff --git a/pyproject.toml b/pyproject.toml
index 8c4c540..686190b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,6 +33,7 @@ dependencies = [
   "ldap3 (==2.10.2rc2)",
   "packaging>=25.0",
   "pgpy>=0.6.0",
+  "puremagic>=1.30",
   "pydantic-xml (>=2.17.2,<3.0.0)",
   "pyjwt (>=2.10.1,<3.0.0)",
   "pynacl>=1.5.0",
diff --git a/tests/e2e/compose/test_get.py b/tests/e2e/compose/test_get.py
index fecda2b..760b022 100644
--- a/tests/e2e/compose/test_get.py
+++ b/tests/e2e/compose/test_get.py
@@ -97,7 +97,7 @@ def 
test_start_vote_button_enabled_when_tasks_complete(page_compose: Page) -> No
 def test_start_vote_button_has_href(page_compose: Page) -> None:
     """The start vote button should have an href attribute set."""
     vote_button = page_compose.locator("#start-vote-button")
-    expect(vote_button).to_have_attribute("href", 
re.compile(r"/voting/test/0\.1\+compose/\d+"))
+    expect(vote_button).to_have_attribute("href", 
re.compile(r"/voting/test/0\.1\+e2e-compose/\d+"))
 
 
 def test_start_vote_button_has_title(page_compose: Page) -> None:
diff --git a/uv.lock b/uv.lock
index fd39706..8b8c044 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1123,6 +1123,15 @@ wheels = [
     { url = 
"https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl";,
 hash = 
"sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size 
= 13305, upload-time = "2025-10-08T19:49:00.792Z" },
 ]
 
+[[package]]
+name = "puremagic"
+version = "1.30"
+source = { registry = "https://pypi.org/simple"; }
+sdist = { url = 
"https://files.pythonhosted.org/packages/dd/7f/9998706bc516bdd664ccf929a1da6c6e5ee06e48f723ce45aae7cf3ff36e/puremagic-1.30.tar.gz";,
 hash = 
"sha256:f9ff7ac157d54e9cf3bff1addfd97233548e75e685282d84ae11e7ffee1614c9", size 
= 314785, upload-time = "2025-07-04T18:48:36.061Z" }
+wheels = [
+    { url = 
"https://files.pythonhosted.org/packages/91/ed/1e347d85d05b37a8b9a039ca832e5747e1e5248d0bd66042783ef48b4a37/puremagic-1.30-py3-none-any.whl";,
 hash = 
"sha256:5eeeb2dd86f335b9cfe8e205346612197af3500c6872dffebf26929f56e9d3c1", size 
= 43304, upload-time = "2025-07-04T18:48:34.801Z" },
+]
+
 [[package]]
 name = "py-serializable"
 version = "2.1.0"
@@ -1780,6 +1789,7 @@ dependencies = [
     { name = "ldap3" },
     { name = "packaging" },
     { name = "pgpy" },
+    { name = "puremagic" },
     { name = "pydantic-xml" },
     { name = "pyjwt" },
     { name = "pynacl" },
@@ -1835,6 +1845,7 @@ requires-dist = [
     { name = "ldap3", specifier = "==2.10.2rc2" },
     { name = "packaging", specifier = ">=25.0" },
     { name = "pgpy", specifier = ">=0.6.0" },
+    { name = "puremagic", specifier = ">=1.30" },
     { name = "pydantic-xml", specifier = ">=2.17.2,<3.0.0" },
     { name = "pyjwt", specifier = ">=2.10.1,<3.0.0" },
     { name = "pynacl", specifier = ">=1.5.0" },


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to