Re: [OE-core][PATCH v8 4/7] spdx30: Enrich source downloads with version and PURL

2026-03-11 Thread Joshua Watt via lists.openembedded.org
On Mon, Mar 9, 2026 at 7:29 AM  wrote:
>
> From: Stefano Tondo 
>
> Add version extraction, PURL generation, and external references
> to source download packages in SPDX 3.0 SBOMs:
>
> - Extract version from SRCREV for Git sources (full SHA-1)
> - Generate PURLs for Git sources on github.com by default
> - Support custom mappings via SPDX_GIT_PURL_MAPPINGS variable
>   (format: "domain:purl_type", split(':', 1) for parsing)
> - Use ecosystem PURLs from SPDX_PACKAGE_URLS for non-Git
> - Add VCS external references for Git downloads
> - Add distribution external references for tarball downloads
> - Parse Git URLs using urllib.parse
> - Extract logic into _generate_git_purl() and
>   _enrich_source_package() helpers
>
> The SPDX_GIT_PURL_MAPPINGS variable allows configuring PURL
> generation for self-hosted Git services (e.g., GitLab).
> github.com is always mapped to pkg:github by default.
>
> Signed-off-by: Stefano Tondo 
> ---
>  meta/classes/create-spdx-3.0.bbclass |   7 ++
>  meta/lib/oe/spdx30_tasks.py  | 122 +++
>  2 files changed, 129 insertions(+)
>
> diff --git a/meta/classes/create-spdx-3.0.bbclass 
> b/meta/classes/create-spdx-3.0.bbclass
> index def2dacbc3..9e912b34e1 100644
> --- a/meta/classes/create-spdx-3.0.bbclass
> +++ b/meta/classes/create-spdx-3.0.bbclass
> @@ -152,6 +152,13 @@ SPDX_PACKAGE_URLS[doc] = "A space separated list of 
> Package URLs (purls) for \
>  Override this variable to replace the default, otherwise append or 
> prepend \
>  to add additional purls."
>
> +SPDX_GIT_PURL_MAPPINGS ??= ""
> +SPDX_GIT_PURL_MAPPINGS[doc] = "A space separated list of domain:purl_type \
> +mappings to configure PURL generation for Git source downloads. \
> +For example, "gitlab.example.com:pkg:gitlab" maps repositories hosted \
> +on gitlab.example.com to the pkg:gitlab PURL type. \
> +github.com is always mapped to pkg:github by default."
> +
>  IMAGE_CLASSES:append = " create-spdx-image-3.0"
>  SDK_CLASSES += "create-spdx-sdk-3.0"
>
> diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
> index c3a23d7889..1f6c84628d 100644
> --- a/meta/lib/oe/spdx30_tasks.py
> +++ b/meta/lib/oe/spdx30_tasks.py
> @@ -13,6 +13,7 @@ import oe.spdx30
>  import oe.spdx_common
>  import oe.sdk
>  import os
> +import urllib.parse
>
>  from contextlib import contextmanager
>  from datetime import datetime, timezone
> @@ -377,6 +378,125 @@ def collect_dep_sources(dep_objsets, dest):
>  index_sources_by_hash(e.to, dest)
>
>
> +def _generate_git_purl(d, download_location, srcrev):
> +"""Generate a Package URL for a Git source from its download location.
> +
> +Parses the Git URL to identify the hosting service and generates the
> +appropriate PURL type. Supports github.com by default and custom
> +mappings via SPDX_GIT_PURL_MAPPINGS.
> +
> +Returns the PURL string or None if no mapping matches.
> +"""
> +if not download_location or not download_location.startswith('git+'):
> +return None
> +
> +git_url = download_location[4:]  # Remove 'git+' prefix
> +
> +# Default handler: github.com
> +git_purl_handlers = {
> +'github.com': 'pkg:github',
> +}
> +
> +# Custom PURL mappings from SPDX_GIT_PURL_MAPPINGS
> +# Format: "domain1:purl_type1 domain2:purl_type2"
> +custom_mappings = d.getVar('SPDX_GIT_PURL_MAPPINGS')
> +if custom_mappings:
> +for mapping in custom_mappings.split():
> +parts = mapping.split(':', 1)
> +if len(parts) == 2:
> +git_purl_handlers[parts[0]] = parts[1]
> +bb.debug(2, f"Added custom Git PURL mapping: {parts[0]} -> 
> {parts[1]}")
> +else:
> +bb.warn(f"Invalid SPDX_GIT_PURL_MAPPINGS entry: {mapping} 
> (expected format: domain:purl_type)")
> +
> +try:
> +parsed = urllib.parse.urlparse(git_url)
> +except Exception:
> +return None
> +
> +hostname = parsed.hostname
> +if not hostname:
> +return None
> +
> +for domain, purl_type in git_purl_handlers.items():
> +if hostname == domain:
> +path = parsed.path.strip('/')
> +path_parts = path.split('/')
> +if len(path_parts) >= 2:
> +owner = path_parts[0]
> +repo = path_parts[1].replace('.git', '')
> +return f"{purl_type}/{owner}/{repo}@{srcrev}"
> +break
> +
> +return None
> +
> +
> +def _enrich_source_package(d, dl, fd, file_name, primary_purpose):
> +"""Enrich a source download package with version, PURL, and external 
> refs.
> +
> +Extracts version from SRCREV for Git sources, generates PURLs for
> +known hosting services, and adds external references for VCS,
> +distribution URLs, and homepage.
> +"""
> +version = None
> +purl = None
> +
> +if fd.type == "git":
> +# Use full SHA-1 from fd.revision
> +srcrev = ge

Re: [OE-core][PATCH v8 4/7] spdx30: Enrich source downloads with version and PURL

2026-03-11 Thread Joshua Watt via lists.openembedded.org
On Mon, Mar 9, 2026 at 7:29 AM  wrote:
>
> From: Stefano Tondo 
>
> Add version extraction, PURL generation, and external references
> to source download packages in SPDX 3.0 SBOMs:
>
> - Extract version from SRCREV for Git sources (full SHA-1)
> - Generate PURLs for Git sources on github.com by default
> - Support custom mappings via SPDX_GIT_PURL_MAPPINGS variable
>   (format: "domain:purl_type", split(':', 1) for parsing)
> - Use ecosystem PURLs from SPDX_PACKAGE_URLS for non-Git
> - Add VCS external references for Git downloads
> - Add distribution external references for tarball downloads
> - Parse Git URLs using urllib.parse
> - Extract logic into _generate_git_purl() and
>   _enrich_source_package() helpers
>
> The SPDX_GIT_PURL_MAPPINGS variable allows configuring PURL
> generation for self-hosted Git services (e.g., GitLab).
> github.com is always mapped to pkg:github by default.
>
> Signed-off-by: Stefano Tondo 
> ---
>  meta/classes/create-spdx-3.0.bbclass |   7 ++
>  meta/lib/oe/spdx30_tasks.py  | 122 +++
>  2 files changed, 129 insertions(+)
>
> diff --git a/meta/classes/create-spdx-3.0.bbclass 
> b/meta/classes/create-spdx-3.0.bbclass
> index def2dacbc3..9e912b34e1 100644
> --- a/meta/classes/create-spdx-3.0.bbclass
> +++ b/meta/classes/create-spdx-3.0.bbclass
> @@ -152,6 +152,13 @@ SPDX_PACKAGE_URLS[doc] = "A space separated list of 
> Package URLs (purls) for \
>  Override this variable to replace the default, otherwise append or 
> prepend \
>  to add additional purls."
>
> +SPDX_GIT_PURL_MAPPINGS ??= ""
> +SPDX_GIT_PURL_MAPPINGS[doc] = "A space separated list of domain:purl_type \
> +mappings to configure PURL generation for Git source downloads. \
> +For example, "gitlab.example.com:pkg:gitlab" maps repositories hosted \
> +on gitlab.example.com to the pkg:gitlab PURL type. \
> +github.com is always mapped to pkg:github by default."
> +
>  IMAGE_CLASSES:append = " create-spdx-image-3.0"
>  SDK_CLASSES += "create-spdx-sdk-3.0"
>
> diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
> index c3a23d7889..1f6c84628d 100644
> --- a/meta/lib/oe/spdx30_tasks.py
> +++ b/meta/lib/oe/spdx30_tasks.py
> @@ -13,6 +13,7 @@ import oe.spdx30
>  import oe.spdx_common
>  import oe.sdk
>  import os
> +import urllib.parse
>
>  from contextlib import contextmanager
>  from datetime import datetime, timezone
> @@ -377,6 +378,125 @@ def collect_dep_sources(dep_objsets, dest):
>  index_sources_by_hash(e.to, dest)
>
>
> +def _generate_git_purl(d, download_location, srcrev):
> +"""Generate a Package URL for a Git source from its download location.
> +
> +Parses the Git URL to identify the hosting service and generates the
> +appropriate PURL type. Supports github.com by default and custom
> +mappings via SPDX_GIT_PURL_MAPPINGS.
> +
> +Returns the PURL string or None if no mapping matches.
> +"""
> +if not download_location or not download_location.startswith('git+'):
> +return None
> +
> +git_url = download_location[4:]  # Remove 'git+' prefix
> +
> +# Default handler: github.com
> +git_purl_handlers = {
> +'github.com': 'pkg:github',
> +}
> +
> +# Custom PURL mappings from SPDX_GIT_PURL_MAPPINGS
> +# Format: "domain1:purl_type1 domain2:purl_type2"
> +custom_mappings = d.getVar('SPDX_GIT_PURL_MAPPINGS')
> +if custom_mappings:
> +for mapping in custom_mappings.split():
> +parts = mapping.split(':', 1)
> +if len(parts) == 2:
> +git_purl_handlers[parts[0]] = parts[1]
> +bb.debug(2, f"Added custom Git PURL mapping: {parts[0]} -> 
> {parts[1]}")
> +else:
> +bb.warn(f"Invalid SPDX_GIT_PURL_MAPPINGS entry: {mapping} 
> (expected format: domain:purl_type)")
> +
> +try:
> +parsed = urllib.parse.urlparse(git_url)
> +except Exception:
> +return None
> +
> +hostname = parsed.hostname
> +if not hostname:
> +return None
> +
> +for domain, purl_type in git_purl_handlers.items():
> +if hostname == domain:
> +path = parsed.path.strip('/')
> +path_parts = path.split('/')
> +if len(path_parts) >= 2:
> +owner = path_parts[0]
> +repo = path_parts[1].replace('.git', '')
> +return f"{purl_type}/{owner}/{repo}@{srcrev}"
> +break
> +
> +return None
> +
> +
> +def _enrich_source_package(d, dl, fd, file_name, primary_purpose):
> +"""Enrich a source download package with version, PURL, and external 
> refs.
> +
> +Extracts version from SRCREV for Git sources, generates PURLs for
> +known hosting services, and adds external references for VCS,
> +distribution URLs, and homepage.
> +"""
> +version = None
> +purl = None
> +
> +if fd.type == "git":
> +# Use full SHA-1 from fd.revision
> +srcrev = ge