From: Stefano Tondo <[email protected]>
Use recipe metadata (PV, inherited classes) to determine package ecosystem
and version instead of unreliable filename parsing.
Previous implementation used greedy regex patterns matching any
name-version.tar.gz file, causing false positives:
zlib-1.3.1.tar.gz → pkg:pypi/zlib (WRONG - zlib is not from PyPI)
Changes:
- Always use d.getVar("PV") for version (addresses review feedback)
- Determine ecosystem via inherits_class() checks (pypi, npm, cpan, etc.)
- Only parse filenames for unambiguous cases (.crate extension)
- Support all major ecosystems: Rust, Go, PyPI, NPM, CPAN, NuGet, Maven
- Use pkg:generic for C/C++ libraries and other non-ecosystem sources
Example results:
- zlib source: pkg:generic/[email protected]
- zlib built package: pkg:yocto/core/[email protected]
- Python with pypi class: pkg:pypi/[email protected]
- Rust crate: pkg:cargo/[email protected]
This approach aligns with Yocto's metadata system and ensures every
source download gets a PURL for supply chain tracking.
Signed-off-by: Stefano Tondo <[email protected]>
---
meta/lib/oe/spdx30_tasks.py | 160 ++++++++++++++++++++++++++++++++++++
1 file changed, 160 insertions(+)
diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
index 86430c7008..c685b649b3 100644
--- a/meta/lib/oe/spdx30_tasks.py
+++ b/meta/lib/oe/spdx30_tasks.py
@@ -357,6 +357,155 @@ def collect_dep_sources(dep_objsets, dest):
index_sources_by_hash(e.to, dest)
+def extract_dependency_metadata(d, file_name):
+ """
+ Extract version and generate PURL for dependency packages.
+
+ Uses recipe metadata (PV, inherited classes) to determine package ecosystem
+ rather than guessing from filenames. Only parses filenames for unambiguous
+ cases where the file extension definitively identifies the ecosystem.
+
+ Supported ecosystems:
+ - Rust crates (.crate extension is unambiguous)
+ - Go modules (when GO_IMPORT is set or domain pattern is explicit)
+ - PyPI packages (when recipe inherits pypi class)
+ - NPM packages (when recipe inherits npm class)
+ - CPAN packages (when recipe inherits cpan class)
+ - NuGet packages (when recipe inherits nuget/dotnet class)
+ - Maven packages (when recipe inherits maven class)
+
+ Returns: (version, purl) tuple, or (None, None) if cannot determine
+ """
+ import re
+
+ # Get version from recipe PV (always prefer recipe metadata over filename
parsing)
+ pv = d.getVar("PV")
+ version = pv if pv else None
+ purl = None
+
+ # Case 1: Rust crate - .crate extension is unambiguous
+ if file_name.endswith('.crate'):
+ crate_match =
re.match(r'^(.+?)-(\d+\.\d+\.\d+(?:\.\d+)?(?:[-+][\w.]+)?)\.crate$', file_name)
+ if crate_match:
+ name = crate_match.group(1)
+ # Use filename version for crates (they embed version in filename)
+ version = crate_match.group(2)
+ purl = f"pkg:cargo/{name}@{version}"
+ return (version, purl)
+
+ # Case 2: Go module - check if GO_IMPORT is set (most reliable)
+ go_import = d.getVar("GO_IMPORT")
+ if go_import and version:
+ # GO_IMPORT contains the module path (e.g.,
github.com/containers/storage)
+ purl = f"pkg:golang/{go_import}@{version}"
+ return (version, purl)
+
+ # Case 3: Go module from filename - only for explicit hosting domains with
version in filename
+ # Patterns like github.com.user.repo-v1.2.3.tar.gz where the domain is
explicit
+ go_match = re.match(
+
r'^((?:github|gitlab|gopkg|golang|go\.googlesource)\.com\.[\w.]+(?:\.[\w-]+)*?)-(v?\d+\.\d+\.\d+(?:[-+][\w.]+)?)\.',
+ file_name
+ )
+ if go_match:
+ # Convert dots to slashes for proper Go module path
+ # github.com.containers.storage → github.com/containers/storage
+ module_path = go_match.group(1).replace('.', '/', 1) # First dot only
+ parts = module_path.split('/', 1)
+ if len(parts) == 2:
+ domain = parts[0]
+ path = parts[1].replace('.', '/')
+ module_path = f"{domain}/{path}"
+
+ version = go_match.group(2)
+ purl = f"pkg:golang/{module_path}@{version}"
+ return (version, purl)
+
+ # Case 4: PyPI package - check if recipe inherits pypi class
+ if bb.data.inherits_class("pypi", d) and version:
+ # Get the PyPI package name from PYPI_PACKAGE variable (handles
python3- prefix removal)
+ pypi_package = d.getVar("PYPI_PACKAGE")
+ if pypi_package:
+ # Normalize package name per PEP 503
+ name = re.sub(r"[-_.]+", "-", pypi_package).lower()
+ purl = f"pkg:pypi/{name}@{version}"
+ return (version, purl)
+
+ # Case 5: NPM package - check if recipe inherits npm class
+ if bb.data.inherits_class("npm", d) and version:
+ # Get package name from recipe
+ bpn = d.getVar("BPN")
+ if bpn:
+ # Remove npm- prefix if present
+ name = bpn[4:] if bpn.startswith('npm-') else bpn
+ purl = f"pkg:npm/{name}@{version}"
+ return (version, purl)
+
+ # Case 6: CPAN package - check if recipe inherits cpan class
+ if bb.data.inherits_class("cpan", d) and version:
+ # Get package name from recipe
+ bpn = d.getVar("BPN")
+ if bpn:
+ # Remove perl- or libperl- prefixes if present
+ if bpn.startswith('perl-'):
+ name = bpn[5:]
+ elif bpn.startswith('libperl-'):
+ name = bpn[8:]
+ else:
+ name = bpn
+ purl = f"pkg:cpan/{name}@{version}"
+ return (version, purl)
+
+ # Case 7: NuGet package - check if recipe inherits nuget/dotnet class
+ if (bb.data.inherits_class("nuget", d) or bb.data.inherits_class("dotnet",
d)) and version:
+ bpn = d.getVar("BPN")
+ if bpn:
+ # Remove dotnet- or nuget- prefix if present
+ if bpn.startswith('dotnet-'):
+ name = bpn[7:]
+ elif bpn.startswith('nuget-'):
+ name = bpn[6:]
+ else:
+ name = bpn
+ purl = f"pkg:nuget/{name}@{version}"
+ return (version, purl)
+
+ # Case 8: Maven package - check if recipe inherits maven class
+ if bb.data.inherits_class("maven", d) and version:
+ # Maven PURLs require group:artifact format
+ # Check for MAVEN_GROUP_ID and MAVEN_ARTIFACT_ID variables
+ group_id = d.getVar("MAVEN_GROUP_ID")
+ artifact_id = d.getVar("MAVEN_ARTIFACT_ID")
+
+ if group_id and artifact_id:
+ # Proper Maven PURL: pkg:maven/group.id/artifact@version
+ purl = f"pkg:maven/{group_id}/{artifact_id}@{version}"
+ return (version, purl)
+ else:
+ # Fallback: use BPN as artifact name without group
+ bpn = d.getVar("BPN")
+ if bpn:
+ # Remove maven- or java- prefix if present
+ if bpn.startswith('maven-'):
+ name = bpn[6:]
+ elif bpn.startswith('java-'):
+ name = bpn[5:]
+ else:
+ name = bpn
+ purl = f"pkg:maven/{name}@{version}"
+ return (version, purl)
+
+ # Fallback: use pkg:generic for source downloads without specific ecosystem
+ # This covers C/C++ libraries and other non-ecosystem packages
+ bpn = d.getVar("BPN")
+ if version and bpn:
+ # Generic PURL for source tarballs (e.g., zlib, openssl, curl)
+ # The built package will have pkg:yocto/... PURL
+ purl = f"pkg:generic/{bpn}@{version}"
+ return (version, purl)
+
+ return (version, None)
+
+
def add_download_files(d, objset):
inputs = set()
@@ -408,6 +557,9 @@ def add_download_files(d, objset):
inputs.add(file)
else:
+ # Extract version and PURL for dependency packages using recipe
metadata
+ dep_version, dep_purl = extract_dependency_metadata(d, file_name)
+
dl = objset.add(
oe.spdx30.software_Package(
_id=objset.new_spdxid("source", str(download_idx + 1)),
@@ -420,6 +572,14 @@ def add_download_files(d, objset):
)
)
+ # Add version if extracted
+ if dep_version:
+ dl.software_packageVersion = dep_version
+
+ # Add PURL if generated
+ if dep_purl:
+ dl.software_packageUrl = dep_purl
+
if fd.method.supports_checksum(fd):
# TODO Need something better than hard coding this
for checksum_id in ["sha256", "sha1"]:
--
2.52.0
-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#229022):
https://lists.openembedded.org/g/openembedded-core/message/229022
Mute This Topic: https://lists.openembedded.org/mt/117138939/21656
Group Owner: [email protected]
Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub
[[email protected]]
-=-=-=-=-=-=-=-=-=-=-=-