This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git


The following commit(s) were added to refs/heads/sbp by this push:
     new 7233089  Detect and allow package roots from npm pack output
7233089 is described below

commit 72330898d2cf1b173df94b6e9f466bf812e2f77e
Author: Sean B. Palmer <[email protected]>
AuthorDate: Fri Jan 30 17:18:39 2026 +0000

    Detect and allow package roots from npm pack output
---
 atr/tasks/checks/targz.py                |  63 +++++++++++---
 atr/tasks/checks/zipformat.py            |  42 ++++++++-
 atr/util.py                              |  59 +++++++++++++
 tests/unit/test_archive_root_variants.py | 144 +++++++++++++++++++++++++++++++
 4 files changed, 292 insertions(+), 16 deletions(-)

diff --git a/atr/tasks/checks/targz.py b/atr/tasks/checks/targz.py
index 4eae0fb..f3583b1 100644
--- a/atr/tasks/checks/targz.py
+++ b/atr/tasks/checks/targz.py
@@ -51,9 +51,10 @@ async def integrity(args: checks.FunctionArguments) -> 
results.Results | None:
     return None
 
 
-def root_directory(tgz_path: str) -> str:
-    """Find the root directory in a tar archive and validate that it has only 
one root dir."""
+def root_directory(tgz_path: str) -> tuple[str, bytes | None]:  # noqa: C901
+    """Find root directory and extract package/package.json if found."""
     root = None
+    package_json: bytes | None = None
 
     with tarzip.open_archive(tgz_path) as archive:
         for member in archive:
@@ -63,18 +64,30 @@ def root_directory(tgz_path: str) -> str:
 
             parts = member.name.split("/", 1)
             if len(parts) >= 1:
-                if not root:
+                if root is None:
                     root = parts[0]
                 elif parts[0] != root:
                     raise RootDirectoryError(f"Multiple root directories 
found: {root}, {parts[0]}")
 
+            if (root == "package") and (package_json is None):
+                member_name = member.name.lstrip("./")
+                if (member_name == "package/package.json") and member.isfile():
+                    size = member.size if hasattr(member, "size") else 0
+                    if (size > 0) and (size <= util.NPM_PACKAGE_JSON_MAX_SIZE):
+                        f = archive.extractfile(member)
+                        if f is not None:
+                            try:
+                                package_json = f.read()
+                            finally:
+                                f.close()
+
     if not root:
         raise RootDirectoryError("No root directory found in archive")
 
-    return root
+    return root, package_json
 
 
-async def structure(args: checks.FunctionArguments) -> results.Results | None:
+async def structure(args: checks.FunctionArguments) -> results.Results | None: 
 # noqa: C901
     """Check the structure of a .tar.gz file."""
     recorder = await args.recorder()
     if not (artifact_abs_path := await recorder.abs_path()):
@@ -94,16 +107,42 @@ async def structure(args: checks.FunctionArguments) -> 
results.Results | None:
     )
 
     try:
-        root = await asyncio.to_thread(root_directory, str(artifact_abs_path))
+        root, package_json = await asyncio.to_thread(root_directory, 
str(artifact_abs_path))
+        data: dict[str, object] = {
+            "root": root,
+            "basename_from_filename": basename_from_filename,
+            "expected_roots": expected_roots,
+        }
         if root in expected_roots:
-            await recorder.success(
-                "Archive contains exactly one root directory matching an 
expected name",
-                {"root": root, "basename_from_filename": 
basename_from_filename, "expected_roots": expected_roots},
-            )
+            await recorder.success("Archive contains exactly one root 
directory matching an expected name", data)
+        elif root == "package":
+            if package_json is not None:
+                npm_info, npm_error = util.parse_npm_pack_info(package_json, 
basename_from_filename)
+                if npm_info is not None:
+                    data["npm_pack"] = {
+                        "name": npm_info.name,
+                        "version": npm_info.version,
+                        "filename_match": npm_info.filename_match,
+                    }
+                    if npm_info.filename_match is False:
+                        await recorder.warning(
+                            "npm pack layout detected but filename does not 
match package.json", data
+                        )
+                    else:
+                        await recorder.success("npm pack layout detected, 
allowing package/ root", data)
+                else:
+                    if npm_error is not None:
+                        data["npm_pack_error"] = npm_error
+                    await recorder.warning(
+                        f"Root directory '{root}' does not match expected 
names '{expected_roots_display}'", data
+                    )
+            else:
+                await recorder.warning(
+                    f"Root directory '{root}' does not match expected names 
'{expected_roots_display}'", data
+                )
         else:
             await recorder.warning(
-                f"Root directory '{root}' does not match expected names 
'{expected_roots_display}'",
-                {"root": root, "basename_from_filename": 
basename_from_filename, "expected_roots": expected_roots},
+                f"Root directory '{root}' does not match expected names 
'{expected_roots_display}'", data
             )
     except tarzip.ArchiveMemberLimitExceededError as e:
         await recorder.failure(f"Archive has too many members: {e}", {"error": 
str(e)})
diff --git a/atr/tasks/checks/zipformat.py b/atr/tasks/checks/zipformat.py
index 504de2f..6cfb0b5 100644
--- a/atr/tasks/checks/zipformat.py
+++ b/atr/tasks/checks/zipformat.py
@@ -92,11 +92,27 @@ def _integrity_check_core_logic(artifact_path: str) -> 
dict[str, Any]:
         return {"error": f"Unexpected error: {e}"}
 
 
-def _structure_check_core_logic(artifact_path: str) -> dict[str, Any]:
+def _structure_check_core_logic(artifact_path: str) -> dict[str, Any]:  # 
noqa: C901
     """Verify the internal structure of the zip archive."""
     try:
         with tarzip.open_archive(artifact_path) as archive:
-            members: list[tarzip.Member] = list(archive)
+            members: list[tarzip.Member] = []
+            package_json: bytes | None = None
+
+            for member in archive:
+                members.append(member)
+                if package_json is None:
+                    member_name = member.name.lstrip("./")
+                    if (member_name == "package/package.json") and 
member.isfile():
+                        size = member.size if hasattr(member, "size") else 0
+                        if (size > 0) and (size <= 
util.NPM_PACKAGE_JSON_MAX_SIZE):
+                            f = archive.extractfile(member)
+                            if f is not None:
+                                try:
+                                    package_json = f.read()
+                                finally:
+                                    f.close()
+
             if not members:
                 return {"error": "Archive is empty"}
 
@@ -110,7 +126,22 @@ def _structure_check_core_logic(artifact_path: str) -> 
dict[str, Any]:
                 member_names, root_dirs, non_rooted_files, expected_roots
             )
 
-            if error_msg:
+            if error_msg is not None:
+                if (actual_root == "package") and (package_json is not None):
+                    npm_info, _ = util.parse_npm_pack_info(package_json, 
basename_from_filename)
+                    if npm_info is not None:
+                        npm_data: dict[str, Any] = {
+                            "root_dir": actual_root,
+                            "expected_roots": expected_roots,
+                            "npm_pack": {
+                                "name": npm_info.name,
+                                "version": npm_info.version,
+                                "filename_match": npm_info.filename_match,
+                            },
+                        }
+                        if npm_info.filename_match is False:
+                            npm_data["warning"] = "npm pack layout detected 
but filename does not match package.json"
+                        return npm_data
                 result_data: dict[str, Any] = {"expected_roots": 
expected_roots}
                 if error_msg.startswith("Root directory mismatch"):
                     result_data["warning"] = error_msg
@@ -157,7 +188,10 @@ def _structure_check_core_logic_validate_root(
     actual_root = next(iter(root_dirs))
     if actual_root not in expected_roots:
         expected_roots_display = "', '".join(expected_roots)
-        return None, f"Root directory mismatch. Expected one of 
'{expected_roots_display}', found '{actual_root}'"
+        return (
+            actual_root,
+            f"Root directory mismatch. Expected one of 
'{expected_roots_display}', found '{actual_root}'",
+        )
 
     # Check whether all members are under the correct root directory
     expected_prefix = f"{actual_root.rstrip('/')}/"
diff --git a/atr/util.py b/atr/util.py
index 4b60e1a..b2d67f1 100644
--- a/atr/util.py
+++ b/atr/util.py
@@ -70,9 +70,17 @@ DEV_THREAD_URLS: Final[dict[str, str]] = {
     "CADL1oArKFcXvNb1MJfjN=10-yrfkxgpltrurdmm1r7ygatk...@mail.gmail.com": 
"https://lists.apache.org/thread/d7119h2qm7jrd5zsbp8ghkk0lpvnnxnw";,
     "[email protected]": 
"https://lists.apache.org/thread/gzjd2jv7yod5sk5rgdf4x33g5l3fdf5o";,
 }
+NPM_PACKAGE_JSON_MAX_SIZE: Final[int] = 512 * 1024
 USER_TESTS_ADDRESS: Final[str] = "[email protected]"
 
 
[email protected](frozen=True)
+class NpmPackInfo:
+    name: str
+    version: str
+    filename_match: bool | None
+
+
 class SshFingerprintError(ValueError):
     pass
 
@@ -701,6 +709,20 @@ def parse_key_blocks_bytes(keys_data: bytes) -> list[str]:
     return key_blocks
 
 
+def parse_npm_pack_info(raw: bytes, filename_basename: str | None = None) -> 
tuple[NpmPackInfo | None, str | None]:
+    """Parse npm pack info from package.json content."""
+    parsed, error = _npm_pack_parse_package_json(raw)
+    if (error is not None) or (parsed is None):
+        return None, error
+
+    name, version, error = _npm_pack_extract_name_version(parsed)
+    if (error is not None) or (name is None) or (version is None):
+        return None, error
+
+    filename_match = _npm_pack_filename_match(filename_basename, name, version)
+    return NpmPackInfo(name=name, version=version, 
filename_match=filename_match), None
+
+
 async def paths_recursive(base_path: pathlib.Path) -> 
AsyncGenerator[pathlib.Path]:
     """Yield all file paths recursively within a base path, relative to the 
base path."""
     if (resolved_base_path := await is_dir_resolve(base_path)) is None:
@@ -1159,6 +1181,43 @@ def _generate_hexdump(data: bytes) -> str:
     return "\n".join(hex_lines)
 
 
+def _npm_pack_extract_name_version(parsed: dict[str, Any]) -> tuple[str | 
None, str | None, str | None]:
+    name = parsed.get("name")
+    version = parsed.get("version")
+
+    if (not isinstance(name, str)) or (not name.strip()):
+        return None, None, "package/package.json missing or invalid 'name'"
+    if (not isinstance(version, str)) or (not version.strip()):
+        return None, None, "package/package.json missing or invalid 'version'"
+
+    return name.strip(), version.strip(), None
+
+
+def _npm_pack_filename_match(filename_basename: str | None, name: str, 
version: str) -> bool | None:
+    if not filename_basename:
+        return None
+    if "/" in name:
+        return None
+    return filename_basename == f"{name}-{version}"
+
+
+def _npm_pack_parse_package_json(raw: bytes) -> tuple[dict[str, Any] | None, 
str | None]:
+    try:
+        payload = raw.decode("utf-8")
+    except UnicodeDecodeError:
+        return None, "package/package.json is not valid UTF-8"
+
+    try:
+        parsed = json.loads(payload)
+    except json.JSONDecodeError as exc:
+        return None, f"package/package.json is not valid JSON: {exc}"
+
+    if not isinstance(parsed, dict):
+        return None, "package/package.json is not a JSON object"
+
+    return parsed, None
+
+
 def _thread_messages_walk(node: dict[str, Any] | None, message_ids: set[str]) 
-> None:
     if not isinstance(node, dict):
         return
diff --git a/tests/unit/test_archive_root_variants.py 
b/tests/unit/test_archive_root_variants.py
index 5a7f496..36ec86b 100644
--- a/tests/unit/test_archive_root_variants.py
+++ b/tests/unit/test_archive_root_variants.py
@@ -16,6 +16,7 @@
 # under the License.
 
 import io
+import json
 import pathlib
 import tarfile
 import zipfile
@@ -29,6 +30,33 @@ import atr.tasks.checks.zipformat as zipformat
 import tests.unit.recorders as recorders
 
 
[email protected]
+async def test_targz_structure_accepts_npm_pack_root(tmp_path: pathlib.Path) 
-> None:
+    archive_path = tmp_path / "example-1.2.3.tgz"
+    _make_tar_gz_with_contents(
+        archive_path,
+        {
+            "package/package.json": json.dumps({"name": "example", "version": 
"1.2.3"}),
+            "package/README.txt": "hello",
+        },
+    )
+    recorder = recorders.RecorderStub(archive_path, 
"tests.unit.test_archive_root_variants")
+    args = checks.FunctionArguments(
+        recorder=recorders.get_recorder(recorder),
+        asf_uid="",
+        project_name="test",
+        version_name="test",
+        revision_number="00001",
+        primary_rel_path=None,
+        extra_args={},
+    )
+
+    await targz.structure(args)
+
+    assert any(status == sql.CheckResultStatus.SUCCESS.value for status, _, _ 
in recorder.messages)
+    assert not any(status == sql.CheckResultStatus.FAILURE.value for status, 
_, _ in recorder.messages)
+
+
 @pytest.mark.asyncio
 async def test_targz_structure_accepts_source_suffix_variant(tmp_path: 
pathlib.Path) -> None:
     archive_path = tmp_path / "apache-example-1.2.3-source.tar.gz"
@@ -69,6 +97,31 @@ async def 
test_targz_structure_accepts_src_suffix_variant(tmp_path: pathlib.Path
     assert any(status == sql.CheckResultStatus.SUCCESS.value for status, _, _ 
in recorder.messages)
 
 
[email protected]
+async def 
test_targz_structure_rejects_package_root_without_package_json(tmp_path: 
pathlib.Path) -> None:
+    archive_path = tmp_path / "example-1.2.3.tgz"
+    _make_tar_gz_with_contents(
+        archive_path,
+        {
+            "package/README.txt": "hello",
+        },
+    )
+    recorder = recorders.RecorderStub(archive_path, 
"tests.unit.test_archive_root_variants")
+    args = checks.FunctionArguments(
+        recorder=recorders.get_recorder(recorder),
+        asf_uid="",
+        project_name="test",
+        version_name="test",
+        revision_number="00001",
+        primary_rel_path=None,
+        extra_args={},
+    )
+
+    await targz.structure(args)
+
+    assert any(status == sql.CheckResultStatus.WARNING.value for status, _, _ 
in recorder.messages)
+
+
 @pytest.mark.asyncio
 async def 
test_targz_structure_rejects_source_root_when_filename_has_no_suffix(tmp_path: 
pathlib.Path) -> None:
     archive_path = tmp_path / "apache-example-1.2.3.tar.gz"
@@ -149,6 +202,50 @@ async def 
test_targz_structure_rejects_src_root_when_filename_has_source_suffix(
     assert any(status == sql.CheckResultStatus.WARNING.value for status, _, _ 
in recorder.messages)
 
 
[email protected]
+async def test_targz_structure_warns_on_npm_pack_filename_mismatch(tmp_path: 
pathlib.Path) -> None:
+    archive_path = tmp_path / "example-1.2.3.tgz"
+    _make_tar_gz_with_contents(
+        archive_path,
+        {
+            "package/package.json": json.dumps({"name": "different", 
"version": "1.2.3"}),
+            "package/README.txt": "hello",
+        },
+    )
+    recorder = recorders.RecorderStub(archive_path, 
"tests.unit.test_archive_root_variants")
+    args = checks.FunctionArguments(
+        recorder=recorders.get_recorder(recorder),
+        asf_uid="",
+        project_name="test",
+        version_name="test",
+        revision_number="00001",
+        primary_rel_path=None,
+        extra_args={},
+    )
+
+    await targz.structure(args)
+
+    assert any(status == sql.CheckResultStatus.WARNING.value for status, _, _ 
in recorder.messages)
+    assert any("npm pack layout detected" in message for _, message, _ in 
recorder.messages)
+
+
+def test_zipformat_structure_accepts_npm_pack_root(tmp_path: pathlib.Path) -> 
None:
+    archive_path = tmp_path / "example-1.2.3.zip"
+    _make_zip_with_contents(
+        archive_path,
+        {
+            "package/package.json": json.dumps({"name": "example", "version": 
"1.2.3"}),
+            "package/README.txt": "hello",
+        },
+    )
+
+    result = zipformat._structure_check_core_logic(str(archive_path))
+
+    assert result.get("error") is None
+    assert result.get("warning") is None
+    assert result.get("root_dir") == "package"
+
+
 def test_zipformat_structure_accepts_src_suffix_variant(tmp_path: 
pathlib.Path) -> None:
     archive_path = tmp_path / "apache-example-1.2.3-src.zip"
     _make_zip(archive_path, ["apache-example-1.2.3/README.txt"])
@@ -170,6 +267,38 @@ def 
test_zipformat_structure_rejects_dated_src_suffix(tmp_path: pathlib.Path) ->
     assert "Root directory mismatch" in result["warning"]
 
 
+def 
test_zipformat_structure_rejects_package_root_without_package_json(tmp_path: 
pathlib.Path) -> None:
+    archive_path = tmp_path / "example-1.2.3.zip"
+    _make_zip_with_contents(
+        archive_path,
+        {
+            "package/README.txt": "hello",
+        },
+    )
+
+    result = zipformat._structure_check_core_logic(str(archive_path))
+
+    assert result.get("warning") is not None
+    assert "Root directory mismatch" in result["warning"]
+
+
+def test_zipformat_structure_warns_on_npm_pack_filename_mismatch(tmp_path: 
pathlib.Path) -> None:
+    archive_path = tmp_path / "example-1.2.3.zip"
+    _make_zip_with_contents(
+        archive_path,
+        {
+            "package/package.json": json.dumps({"name": "different", 
"version": "1.2.3"}),
+            "package/README.txt": "hello",
+        },
+    )
+
+    result = zipformat._structure_check_core_logic(str(archive_path))
+
+    assert result.get("warning") is not None
+    assert "npm pack layout detected" in result["warning"]
+    assert result.get("root_dir") == "package"
+
+
 def _make_tar_gz(path: pathlib.Path, members: list[str]) -> None:
     with tarfile.open(path, "w:gz") as tf:
         for name in members:
@@ -179,7 +308,22 @@ def _make_tar_gz(path: pathlib.Path, members: list[str]) 
-> None:
             tf.addfile(info, io.BytesIO(data))
 
 
+def _make_tar_gz_with_contents(path: pathlib.Path, members: dict[str, str]) -> 
None:
+    with tarfile.open(path, "w:gz") as tf:
+        for name, content in members.items():
+            data = content.encode()
+            info = tarfile.TarInfo(name=name)
+            info.size = len(data)
+            tf.addfile(info, io.BytesIO(data))
+
+
 def _make_zip(path: pathlib.Path, members: list[str]) -> None:
     with zipfile.ZipFile(path, "w") as zf:
         for name in members:
             zf.writestr(name, f"data-{name}")
+
+
+def _make_zip_with_contents(path: pathlib.Path, members: dict[str, str]) -> 
None:
+    with zipfile.ZipFile(path, "w") as zf:
+        for name, content in members.items():
+            zf.writestr(name, content)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to