This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-release.git


The following commit(s) were added to refs/heads/main by this push:
     new 4db5e40  Add a basic SBOM ASF license checker
4db5e40 is described below

commit 4db5e4010d9f931863a2d476faa87ba97bfb24d3
Author: Sean B. Palmer <[email protected]>
AuthorDate: Mon Oct 6 19:49:36 2025 +0100

    Add a basic SBOM ASF license checker
---
 atr/sbomtool.py | 338 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 uv.lock         |   2 +-
 2 files changed, 338 insertions(+), 2 deletions(-)

diff --git a/atr/sbomtool.py b/atr/sbomtool.py
index aaa5f9c..56ab237 100644
--- a/atr/sbomtool.py
+++ b/atr/sbomtool.py
@@ -22,6 +22,7 @@ import dataclasses
 import datetime
 import enum
 import pathlib
+import re
 import subprocess
 import sys
 import tempfile
@@ -40,7 +41,112 @@ if TYPE_CHECKING:
     from collections.abc import Iterable
 
 # TODO: Simple cache to avoid rate limiting, not thread safe
+
 CACHE_PATH = pathlib.Path("/tmp/sbomtool-cache.json")
+
+CATEGORY_A_LICENSES: Final[frozenset[str]] = frozenset(
+    {
+        "0BSD",
+        "AFL-3.0",
+        "Apache-1.1",
+        "Apache-2.0",
+        "APAFML",
+        "Bitstream-Vera",
+        "BlueOak-1.0.0",
+        "BSD-2-Clause",
+        "BSD-3-Clause-LBNL",
+        "BSD-3-Clause",
+        "BSL-1.0",
+        "CC-PDDC",
+        "CC0-1.0",
+        "DOC",
+        "EDL-1.0",
+        "EPICS",
+        "FSFAP",
+        "HPND",
+        "ICU",
+        "ISC",
+        "Libpng-2.0",
+        "MIT-0",
+        "MIT",
+        "MS-PL",
+        "MulanPSL-2.0",
+        "NCSA",
+        "OGL-UK-3.0",
+        "PHP-3.01",
+        "PostgreSQL",
+        "Python-2.0",
+        "SMLNJ",
+        "TCL",
+        "Unicode-DFS-2016",
+        "Unlicense",
+        "UPL-1.0",
+        "W3C",
+        "WTFPL",
+        "Xnet",
+        "Zlib",
+        "ZPL-2.0",
+    }
+)
+
+CATEGORY_B_LICENSES: Final[frozenset[str]] = frozenset(
+    {
+        "CC-BY-2.5",
+        "CC-BY-3.0",
+        "CC-BY-4.0",
+        "CC-BY-SA-2.5",
+        "CC-BY-SA-3.0",
+        "CC-BY-SA-4.0",
+        "CDDL-1.0",
+        "CDDL-1.1",
+        "CPL-1.0",
+        "EPL-1.0",
+        "EPL-2.0",
+        "ErlPL-1.1",
+        "IPA",
+        "IPL-1.0",
+        "MPL-1.0",
+        "MPL-1.1",
+        "MPL-2.0",
+        "OFL-1.1",
+        "OSL-3.0",
+        "Ruby",
+        "SPL-1.0",
+        "Ubuntu-1.0",
+        "UnRAR",
+    }
+)
+
+CATEGORY_X_LICENSES: Final[frozenset[str]] = frozenset(
+    {
+        # "Apache-1.0",
+        "AGPL-3.0-only",
+        "BSD-4-Clause-UC",
+        "BSD-4-Clause",
+        "BUSL-1.1",
+        "CC-BY-NC-4.0",
+        "CC-BY-NC-ND-4.0",
+        "CC-BY-NC-SA-4.0",
+        "CPOL-1.02",
+        "GPL-1.0-only",
+        "GPL-2.0-only",
+        "GPL-3.0-only",
+        "JSON",
+        "LGPL-2.0-only",
+        "LGPL-2.1-only",
+        "LGPL-3.0-only",
+        "NPL-1.0",
+        "NPL-1.1",
+        "QPL-1.0",
+        "Sleepycat",
+        "SSPL-1.0",
+    }
+)
+
+_CATEGORY_A_LICENSES_FOLD: Final[frozenset[str]] = frozenset(value.casefold() 
for value in CATEGORY_A_LICENSES)
+_CATEGORY_B_LICENSES_FOLD: Final[frozenset[str]] = frozenset(value.casefold() 
for value in CATEGORY_B_LICENSES)
+_CATEGORY_X_LICENSES_FOLD: Final[frozenset[str]] = frozenset(value.casefold() 
for value in CATEGORY_X_LICENSES)
+
 KNOWN_PURL_PREFIXES: Final[dict[str, tuple[str, str]]] = {
     "pkg:maven/com.atlassian.": ("Atlassian", "https://www.atlassian.com/";),
     "pkg:maven/concurrent/concurrent@": (
@@ -49,12 +155,14 @@ KNOWN_PURL_PREFIXES: Final[dict[str, tuple[str, str]]] = {
     ),
     "pkg:maven/net.shibboleth.": ("The Shibboleth Consortium", 
"https://www.shibboleth.net/";),
 }
+
 KNOWN_PURL_SUPPLIERS: Final[dict[tuple[str, str], tuple[str, str]]] = {
     ("pkg:maven", "jakarta-regexp"): ("The Apache Software Foundation", 
"https://apache.org/";),
     ("pkg:maven", "javax.servlet.jsp"): ("Sun Microsystems", 
"https://sun.com/";),
     ("pkg:maven", "org.opensaml"): ("The Shibboleth Consortium", 
"https://www.shibboleth.net/";),
     ("pkg:maven", "org.osgi"): ("OSGi Working Group, The Eclipse Foundation", 
"https://www.osgi.org/";),
 }
+
 # TODO: Manually updated for now
 # Use GITHUB_TOKEN=... uv run python3 scripts/github_tag_dates.py 
CycloneDX/cyclonedx-maven-plugin
 MAVEN_PLUGIN_VERSIONS: Final[dict[str, str]] = {
@@ -111,7 +219,27 @@ MAVEN_PLUGIN_VERSIONS: Final[dict[str, str]] = {
     "2018-05-24T23:24:10Z": "1.0.1",
     "2018-05-02T16:34:05Z": "1.0.0",
 }
+
+
+SPDX_TOKEN: Final[re.Pattern[str]] = re.compile(
+    r"""
+        (?P<WS>\s+)
+      | (?P<LPAREN>\()
+      | (?P<RPAREN>\))
+      | (?P<AND>AND|and)
+      | (?P<OR>OR|or)
+      | (?P<WITH>WITH|with)
+      | (?P<PLUS>\+)
+      | (?P<DOCREF>DocumentRef-[A-Za-z0-9.-]+:LicenseRef-[A-Za-z0-9.-]+)
+      | (?P<LICREF>LicenseRef-[A-Za-z0-9.-]+)
+      | (?P<ADDREF>AdditionRef-[A-Za-z0-9.-]+)
+      | (?P<ID>[A-Za-z0-9.-]+)
+    """,
+    re.ASCII | re.VERBOSE,
+)
+
 THE_APACHE_SOFTWARE_FOUNDATION: Final[str] = "The Apache Software Foundation"
+
 VERSION: Final[str] = "0.0.1-dev1"
 
 # We include some sections from other files to make this standalone
@@ -186,6 +314,17 @@ class Supplier(Lax):
     name: str | None = None
 
 
+class License(Lax):
+    id: str | None = None
+    name: str | None = None
+    url: str | None = None
+
+
+class LicenseChoice(Lax):
+    license: License | None = None
+    expression: str | None = None
+
+
 class Component(Lax):
     bom_ref: str | None = pydantic.Field(default=None, alias="bom-ref")
     name: str | None = None
@@ -194,6 +333,8 @@ class Component(Lax):
     purl: str | None = None
     cpe: str | None = None
     swid: Swid | None = None
+    licenses: list[LicenseChoice] | None = None
+    scope: str | None = None
 
 
 class ToolComponent(Lax):
@@ -248,6 +389,12 @@ class ComponentProperty(enum.Enum):
     IDENTIFIER = enum.auto()
 
 
+class LicenseCategory(enum.Enum):
+    A = enum.auto()
+    B = enum.auto()
+    X = enum.auto()
+
+
 # Missing* is for NTIA 2021 conformance only
 
 
@@ -317,6 +464,81 @@ Outdated = Annotated[
 OutdatedAdapter = pydantic.TypeAdapter(Outdated)
 
 
+class LicenseIssue(Strict):
+    component_name: str
+    component_version: str | None
+    license_expression: str
+    category: LicenseCategory
+    any_unknown: bool = False
+    scope: str | None = None
+
+
+class SPDXLicenseExpressionParser:
+    def __init__(self, items: list[tuple[str, str]], text: str) -> None:
+        self.items = items
+        self.text = text
+        self.position = 0
+
+    def parse(self) -> set[str]:
+        atoms, _ = self.parse_expression()
+        if self.position != len(self.items):
+            raise ValueError(self.text)
+        return atoms
+
+    def parse_conjunction(self) -> tuple[set[str], bool]:
+        atoms, simple = self.parse_with()
+        while self.peek("AND"):
+            self.position += 1
+            atoms |= self.parse_with()[0]
+            simple = False
+        return atoms, simple
+
+    def parse_expression(self) -> tuple[set[str], bool]:
+        atoms, simple = self.parse_conjunction()
+        while self.peek("OR"):
+            self.position += 1
+            atoms |= self.parse_conjunction()[0]
+            simple = False
+        return atoms, simple
+
+    def parse_primary(self, for_addition: bool) -> tuple[set[str], bool]:
+        if self.position >= len(self.items):
+            raise ValueError(self.text)
+        kind, value = self.items[self.position]
+        if kind == "LPAREN":
+            self.position += 1
+            atoms, _ = self.parse_expression()
+            if not self.peek("RPAREN"):
+                raise ValueError(self.text)
+            self.position += 1
+            return atoms, False
+        if (not for_addition) and (kind in {"ID", "LICREF", "DOCREF"}):
+            self.position += 1
+            base = value
+            if self.peek("PLUS"):
+                self.position += 1
+            return {base}, True
+        if for_addition and (kind in {"ID", "LICREF", "DOCREF", "ADDREF"}):
+            self.position += 1
+            return set(), True
+        raise ValueError(self.text)
+
+    def parse_with(self) -> tuple[set[str], bool]:
+        atoms, simple = self.parse_primary(False)
+        while self.peek("WITH"):
+            if not simple:
+                raise ValueError(self.text)
+            self.position += 1
+            _, right_simple = self.parse_primary(True)
+            if not right_simple:
+                raise ValueError(self.text)
+            simple = False
+        return atoms, simple
+
+    def peek(self, kind: str) -> bool:
+        return (self.position < len(self.items)) and 
(self.items[self.position][0] == kind)
+
+
 @dataclasses.dataclass
 class Bundle:
     doc: yyjson.Document
@@ -531,6 +753,83 @@ def bundle_to_patch(bundle: Bundle) -> Patch:
     return patch_ops
 
 
+def check_licenses(bom: Bom) -> tuple[list[LicenseIssue], list[LicenseIssue]]:
+    warnings: list[LicenseIssue] = []
+    errors: list[LicenseIssue] = []
+
+    components = bom.components or []
+    if bom.metadata and bom.metadata.component:
+        components = [bom.metadata.component, *components]
+
+    for component in components:
+        name = component.name or "unknown"
+        version = component.version
+        scope = component.scope
+
+        if not component.licenses:
+            continue
+
+        for license_choice in component.licenses:
+            license_expr = None
+
+            if license_choice.expression:
+                license_expr = license_choice.expression
+            elif license_choice.license and license_choice.license.id:
+                license_expr = license_choice.license.id
+
+            if not license_expr:
+                continue
+
+            parse_failed = False
+            if license_choice.expression:
+                try:
+                    atoms = spdx_license_expression_atoms(license_expr)
+                except ValueError:
+                    parse_failed = True
+                    atoms = {license_expr}
+            else:
+                atoms = {license_expr}
+            got_warning = False
+            got_error = False
+            any_unknown = parse_failed
+            for atom in atoms:
+                folded = atom.casefold()
+                if folded in _CATEGORY_A_LICENSES_FOLD:
+                    continue
+                if folded in _CATEGORY_B_LICENSES_FOLD:
+                    got_warning = True
+                    continue
+                if folded in _CATEGORY_X_LICENSES_FOLD:
+                    got_error = True
+                    continue
+                got_error = True
+                any_unknown = True
+            if got_error:
+                errors.append(
+                    LicenseIssue(
+                        component_name=name,
+                        component_version=version,
+                        license_expression=license_expr,
+                        category=LicenseCategory.X,
+                        any_unknown=any_unknown,
+                        scope=scope,
+                    )
+                )
+            elif got_warning:
+                warnings.append(
+                    LicenseIssue(
+                        component_name=name,
+                        component_version=version,
+                        license_expression=license_expr,
+                        category=LicenseCategory.B,
+                        any_unknown=False,
+                        scope=scope,
+                    )
+                )
+
+    return warnings, errors
+
+
 def get_pointer(doc: yyjson.Document, path: str) -> Any | None:
     try:
         return doc.get_pointer(path)
@@ -609,13 +908,34 @@ def main() -> None:
                         print()
                     case MissingComponentProperty():
                         components = bundle.bom.components
-                        primary_component = bundle.bom.metadata and 
bundle.bom.metadata.component
+                        primary_component = bundle.bom.metadata.component if 
bundle.bom.metadata else None
                         if (error.index is not None) and (components is not 
None):
                             
print(components[error.index].model_dump_json(indent=2))
                             print()
                         elif primary_component is not None:
                             print(primary_component.model_dump_json(indent=2))
                             print()
+        case "license":
+            warnings, errors = check_licenses(bundle.bom)
+            if warnings:
+                print("WARNINGS (Category B):")
+                for warning in warnings:
+                    version_str = f" {warning.component_version}" if 
warning.component_version else ""
+                    scope_str = f" [scope: {warning.scope}]" if warning.scope 
else ""
+                    print(f"  - {warning.component_name}{version_str}: 
{warning.license_expression}{scope_str}")
+                print()
+            if errors:
+                print("ERRORS (Category X):")
+                for error in errors:
+                    version_str = f" {error.component_version}" if 
error.component_version else ""
+                    scope_str = f" [scope: {error.scope}]" if error.scope else 
""
+                    unknown_suffix = " (Category X due to unknown license 
identifiers)" if error.any_unknown else ""
+                    name_str = f"{error.component_name}{version_str}"
+                    license_str = 
f"{error.license_expression}{scope_str}{unknown_suffix}"
+                    print(f"  - {name_str}: {license_str}")
+                print()
+            if not warnings and not errors:
+                print("All licenses are approved (Category A)")
         case _:
             print(f"unknown command: {sys.argv[1]}")
             sys.exit(1)
@@ -877,6 +1197,22 @@ def sbomqs_total_score(value: pathlib.Path | str | 
yyjson.Document) -> float:
     return report.summary.total_score
 
 
+def spdx_license_expression_atoms(expr: str) -> set[str]:
+    pos = 0
+    tokens: list[tuple[str, str]] = []
+    for match in SPDX_TOKEN.finditer(expr):
+        if match.start() != pos:
+            raise ValueError(expr)
+        pos = match.end()
+        kind = match.lastgroup
+        if (kind) and (kind != "WS"):
+            tokens.append((kind, match.group(kind)))
+    if pos != len(expr):
+        raise ValueError(expr)
+
+    return SPDXLicenseExpressionParser(tokens, expr).parse()
+
+
 def validate_cyclonedx_cli(bundle: Bundle) -> list[str] | None:
     args = [
         "cyclonedx",
diff --git a/uv.lock b/uv.lock
index 54c4e7b..2f9fdc6 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1666,7 +1666,7 @@ wheels = [
 ]
 
 [[package]]
-name = "tooling-trusted-release"
+name = "tooling-trusted-releases"
 version = "0.0.1"
 source = { virtual = "." }
 dependencies = [


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to