On Mon, Mar 9, 2026 at 7:29 AM <[email protected]> wrote:
>
> From: Stefano Tondo <[email protected]>
>
> Add SPDX_FILE_EXCLUDE_PATTERNS variable that allows filtering files from
> SPDX output by pattern matching. The variable accepts a space-separated
> list of patterns; files whose paths contain any pattern are excluded.

"PATTERN" implies regex to me; can we do that (comments below to show
how)? It's a lot more flexible with anchoring, case sensitivity, etc.

 e.g.:

SPDX_FILE_EXCLUDE_PATTERNS = "(?i)\\.patch$ (?i)\\.diff$"

>
> When empty (the default), no filtering is applied and all files are
> included, preserving existing behavior.
>
> This enables users to reduce SBOM size by excluding files that are not
> relevant for compliance (e.g., test files, object files, patches).
>
> When file exclusion is active, debug source lookups that reference
> filtered files are gracefully skipped instead of causing fatal errors.
>
> Signed-off-by: Stefano Tondo <[email protected]>
> ---
>  meta/classes/spdx-common.bbclass |  6 ++++++
>  meta/lib/oe/spdx30_tasks.py      | 28 ++++++++++++++++++++++++----
>  2 files changed, 30 insertions(+), 4 deletions(-)
>
> diff --git a/meta/classes/spdx-common.bbclass 
> b/meta/classes/spdx-common.bbclass
> index 3110230c9e..f54459d3b4 100644
> --- a/meta/classes/spdx-common.bbclass
> +++ b/meta/classes/spdx-common.bbclass
> @@ -54,6 +54,12 @@ SPDX_CONCLUDED_LICENSE[doc] = "The license concluded by 
> manual or external \
>
>  SPDX_MULTILIB_SSTATE_ARCHS ??= "${SSTATE_ARCHS}"
>
> +SPDX_FILE_EXCLUDE_PATTERNS ??= ""
> +SPDX_FILE_EXCLUDE_PATTERNS[doc] = "Space-separated list of patterns to 
> exclude \
> +    from SPDX file output. Files whose paths contain any of these patterns 
> will \
> +    be filtered out. Defaults to empty (no filtering). Example: \
> +    SPDX_FILE_EXCLUDE_PATTERNS = '.patch .diff /test/ .pyc .o'"
> +
>  python () {
>      from oe.cve_check import extend_cve_status
>      extend_cve_status(d)
> diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
> index 99f2892dfb..5ced792d71 100644
> --- a/meta/lib/oe/spdx30_tasks.py
> +++ b/meta/lib/oe/spdx30_tasks.py
> @@ -161,6 +161,9 @@ def add_package_files(
>          compiled_sources, types = oe.spdx_common.get_compiled_sources(d)
>          bb.debug(1, f"Total compiled files: {len(compiled_sources)}")
>
> +    # File exclusion filtering
> +    exclude_patterns = (d.getVar("SPDX_FILE_EXCLUDE_PATTERNS") or "").split()

exclude_patterns = (re.compile(p) for p in
(d.getVar("SPDX_FILE_EXCLUDE_PATTERNS") or "").split())
excluded_files = set()

> +
>      for subdir, dirs, files in os.walk(topdir, onerror=walk_error):
>          dirs[:] = [d for d in dirs if d not in ignore_dirs]
>          if subdir == str(topdir):
> @@ -174,6 +177,13 @@ def add_package_files(
>                  continue
>
>              filename = str(filepath.relative_to(topdir))
> +
> +            # Apply file exclusion filtering
> +            if exclude_patterns:
> +                filename_lower = filename.lower()
> +                if any(pattern in filename_lower for pattern in 
> exclude_patterns):
> +                    continue

if any(p.search(filename) for p in exclude_patterns):
    excluded_files.add(filename)
    continue

> +
>              file_purposes = get_purposes(filepath)
>
>              # Check if file is compiled
> @@ -219,6 +229,8 @@ def add_package_files(
>  def get_package_sources_from_debug(
>      d, package, package_files, sources, source_hash_cache
>  ):
> +    exclude_patterns = (d.getVar("SPDX_FILE_EXCLUDE_PATTERNS") or "").split()
> +
>      def file_path_match(file_path, pkg_file):
>          if file_path.lstrip("/") == pkg_file.name.lstrip("/"):
>              return True
> @@ -251,10 +263,18 @@ def get_package_sources_from_debug(
>              continue
>
>          if not any(file_path_match(file_path, pkg_file) for pkg_file in 
> package_files):
> -            bb.fatal(
> -                "No package file found for %s in %s; SPDX found: %s"
> -                % (str(file_path), package, " ".join(p.name for p in 
> package_files))
> -            )
> +            # When file exclusion patterns are active, some files may be 
> filtered out
> +            if exclude_patterns:
> +                bb.debug(
> +                    1,
> +                    f"Skipping debug source lookup for {file_path} in 
> {package} (file exclusion active)",
> +                )
> +                continue

Instead of assuming this, have add_package_files also return the list
of excluded files (see above), then pass that into this function for
cross checking (the other callers of add_package_files can just ignore
the excluded files e.g.:

  spdx_files, _ = add_package_files(....)

> +            else:
> +                bb.fatal(
> +                    "No package file found for %s in %s; SPDX found: %s"
> +                    % (str(file_path), package, " ".join(p.name for p in 
> package_files))
> +                )
>              continue
>
>          for debugsrc in file_data["debugsrc"]:
> --
> 2.53.0
>
-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#232907): 
https://lists.openembedded.org/g/openembedded-core/message/232907
Mute This Topic: https://lists.openembedded.org/mt/118221135/21656
Group Owner: [email protected]
Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub 
[[email protected]]
-=-=-=-=-=-=-=-=-=-=-=-

Reply via email to