On Mon, Mar 9, 2026 at 7:29 AM <[email protected]> wrote:
>
> From: Stefano Tondo <[email protected]>
>
> Add version extraction, PURL generation, and external references
> to source download packages in SPDX 3.0 SBOMs:
>
> - Extract version from SRCREV for Git sources (full SHA-1)
> - Generate PURLs for Git sources on github.com by default
> - Support custom mappings via SPDX_GIT_PURL_MAPPINGS variable
> (format: "domain:purl_type", split(':', 1) for parsing)
> - Use ecosystem PURLs from SPDX_PACKAGE_URLS for non-Git
> - Add VCS external references for Git downloads
> - Add distribution external references for tarball downloads
> - Parse Git URLs using urllib.parse
> - Extract logic into _generate_git_purl() and
> _enrich_source_package() helpers
>
> The SPDX_GIT_PURL_MAPPINGS variable allows configuring PURL
> generation for self-hosted Git services (e.g., GitLab).
> github.com is always mapped to pkg:github by default.
>
> Signed-off-by: Stefano Tondo <[email protected]>
> ---
> meta/classes/create-spdx-3.0.bbclass | 7 ++
> meta/lib/oe/spdx30_tasks.py | 122 +++++++++++++++++++++++++++
> 2 files changed, 129 insertions(+)
>
> diff --git a/meta/classes/create-spdx-3.0.bbclass
> b/meta/classes/create-spdx-3.0.bbclass
> index def2dacbc3..9e912b34e1 100644
> --- a/meta/classes/create-spdx-3.0.bbclass
> +++ b/meta/classes/create-spdx-3.0.bbclass
> @@ -152,6 +152,13 @@ SPDX_PACKAGE_URLS[doc] = "A space separated list of
> Package URLs (purls) for \
> Override this variable to replace the default, otherwise append or
> prepend \
> to add additional purls."
>
> +SPDX_GIT_PURL_MAPPINGS ??= ""
> +SPDX_GIT_PURL_MAPPINGS[doc] = "A space separated list of domain:purl_type \
> + mappings to configure PURL generation for Git source downloads. \
> + For example, "gitlab.example.com:pkg:gitlab" maps repositories hosted \
> + on gitlab.example.com to the pkg:gitlab PURL type. \
> + github.com is always mapped to pkg:github by default."
> +
> IMAGE_CLASSES:append = " create-spdx-image-3.0"
> SDK_CLASSES += "create-spdx-sdk-3.0"
>
> diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
> index c3a23d7889..1f6c84628d 100644
> --- a/meta/lib/oe/spdx30_tasks.py
> +++ b/meta/lib/oe/spdx30_tasks.py
> @@ -13,6 +13,7 @@ import oe.spdx30
> import oe.spdx_common
> import oe.sdk
> import os
> +import urllib.parse
>
> from contextlib import contextmanager
> from datetime import datetime, timezone
> @@ -377,6 +378,125 @@ def collect_dep_sources(dep_objsets, dest):
> index_sources_by_hash(e.to, dest)
>
>
> +def _generate_git_purl(d, download_location, srcrev):
> + """Generate a Package URL for a Git source from its download location.
> +
> + Parses the Git URL to identify the hosting service and generates the
> + appropriate PURL type. Supports github.com by default and custom
> + mappings via SPDX_GIT_PURL_MAPPINGS.
> +
> + Returns the PURL string or None if no mapping matches.
> + """
> + if not download_location or not download_location.startswith('git+'):
> + return None
> +
> + git_url = download_location[4:] # Remove 'git+' prefix
> +
> + # Default handler: github.com
> + git_purl_handlers = {
> + 'github.com': 'pkg:github',
> + }
> +
> + # Custom PURL mappings from SPDX_GIT_PURL_MAPPINGS
> + # Format: "domain1:purl_type1 domain2:purl_type2"
> + custom_mappings = d.getVar('SPDX_GIT_PURL_MAPPINGS')
> + if custom_mappings:
> + for mapping in custom_mappings.split():
> + parts = mapping.split(':', 1)
> + if len(parts) == 2:
> + git_purl_handlers[parts[0]] = parts[1]
> + bb.debug(2, f"Added custom Git PURL mapping: {parts[0]} ->
> {parts[1]}")
> + else:
> + bb.warn(f"Invalid SPDX_GIT_PURL_MAPPINGS entry: {mapping}
> (expected format: domain:purl_type)")
> +
> + try:
> + parsed = urllib.parse.urlparse(git_url)
> + except Exception:
> + return None
> +
> + hostname = parsed.hostname
> + if not hostname:
> + return None
> +
> + for domain, purl_type in git_purl_handlers.items():
> + if hostname == domain:
> + path = parsed.path.strip('/')
> + path_parts = path.split('/')
> + if len(path_parts) >= 2:
> + owner = path_parts[0]
> + repo = path_parts[1].replace('.git', '')
> + return f"{purl_type}/{owner}/{repo}@{srcrev}"
> + break
> +
> + return None
> +
> +
> +def _enrich_source_package(d, dl, fd, file_name, primary_purpose):
> + """Enrich a source download package with version, PURL, and external
> refs.
> +
> + Extracts version from SRCREV for Git sources, generates PURLs for
> + known hosting services, and adds external references for VCS,
> + distribution URLs, and homepage.
> + """
> + version = None
> + purl = None
> +
> + if fd.type == "git":
> + # Use full SHA-1 from fd.revision
> + srcrev = getattr(fd, 'revision', None)
> + if srcrev and srcrev not in {'${AUTOREV}', 'AUTOINC', 'INVALID'}:
> + version = srcrev
> +
> + # Generate PURL for Git hosting services
> + download_location = getattr(dl, 'software_downloadLocation', None)
> + if version and download_location:
> + purl = _generate_git_purl(d, download_location, version)
> + else:
> + # For non-Git sources, use recipe PV as version
> + pv = d.getVar('PV')
> + if pv and pv not in {'git', 'AUTOINC', 'INVALID', '${PV}'}:
> + version = pv
> +
> + # Use ecosystem PURL from SPDX_PACKAGE_URLS if available
> + package_urls = (d.getVar('SPDX_PACKAGE_URLS') or '').split()
> + for url in package_urls:
> + if not url.startswith('pkg:yocto'):
> + purl = url
> + break
> +
> + if version:
> + dl.software_packageVersion = version
Oh, and this version; I'm not sure you can say the version of the
recipe is the version of all downloaded files
> +
> + if purl:
> + dl.software_packageUrl = purl
> +
> + # Add external references
> + download_location = getattr(dl, 'software_downloadLocation', None)
> + if download_location and isinstance(download_location, str):
> + dl.externalRef = dl.externalRef or []
> +
> + if download_location.startswith('git+'):
> + # VCS reference for Git repositories
> + git_url = download_location[4:]
> + if '@' in git_url:
> + git_url = git_url.split('@')[0]
> +
> + dl.externalRef.append(
> + oe.spdx30.ExternalRef(
> + externalRefType=oe.spdx30.ExternalRefType.vcs,
> + locator=[git_url],
> + )
> + )
> + elif download_location.startswith(('http://', 'https://', 'ftp://')):
> + # Distribution reference for tarball/archive downloads
> + dl.externalRef.append(
> + oe.spdx30.ExternalRef(
> +
> externalRefType=oe.spdx30.ExternalRefType.altDownloadLocation,
> + locator=[download_location],
> + )
> + )
> +
> +
> def add_download_files(d, objset):
> inputs = set()
>
> @@ -440,6 +560,8 @@ def add_download_files(d, objset):
> )
> )
>
> + _enrich_source_package(d, dl, fd, file_name, primary_purpose)
> +
> if fd.method.supports_checksum(fd):
> # TODO Need something better than hard coding this
> for checksum_id in ["sha256", "sha1"]:
> --
> 2.53.0
>
-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#232917):
https://lists.openembedded.org/g/openembedded-core/message/232917
Mute This Topic: https://lists.openembedded.org/mt/118221139/21656
Group Owner: [email protected]
Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub
[[email protected]]
-=-=-=-=-=-=-=-=-=-=-=-