On 1/28/22 4:03 PM, Saul Wold wrote:
This patch will read the begining of source files and try to find
the SPDX-License-Identifier to populate the licenseInfoInFiles
field for each source file. This does not populate licenseConculed
at this time, nor rolls it up to package level.
Signed-off-by: Saul Wold <[email protected]>
---
classes/create-spdx.bbclass | 25 +++++++++++++++++++++++++
lib/oe/spdx.py | 2 +-
2 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/classes/create-spdx.bbclass b/classes/create-spdx.bbclass
index 180d667..9c11945 100644
--- a/classes/create-spdx.bbclass
+++ b/classes/create-spdx.bbclass
@@ -30,6 +30,21 @@ SPDX_LICENSES ??= "${COREBASE}/meta/files/spdx-licenses.json"
do_image_complete[depends] = "virtual/kernel:do_create_spdx"
+def extract_licenses(filename):
+ import re
+ lic_regex = re.compile('SPDX-License-Identifier:\s+([-A-Za-z\d. ]+)[
|\n|\r\n]*?')
+
+ try:
+ with open(filename, 'r') as f:
+ size = min(15000, os.stat(filename).st_size)
+ txt = f.read(size)
+ licenses = re.findall(lic_regex, txt)
+ if licenses:
+ return licenses
+ except Exception as e:
+ bb.warn(f"Exception on {filename}: {e}")
+ return None
+
def get_doc_namespace(d, doc):
import uuid
namespace_uuid = uuid.uuid5(uuid.NAMESPACE_DNS,
d.getVar("SPDX_UUID_NAMESPACE"))
@@ -232,6 +247,16 @@ def add_package_files(d, doc, spdx_pkg, topdir,
get_spdxid, get_types, *, archiv
checksumValue=bb.utils.sha256_file(filepath),
))
+ if "SOURCES" in spdx_file.fileTypes:
+ licenses = extract_licenses(filepath)
+ if licenses is not None:
+ for lic in licenses:
+ spdx_file.licenseInfoInFiles.append(lic.strip())
+ else:
+ spdx_file.licenseInfoInFiles.append("NOASSERTATION")
"NOASSERTION"
+ else:
+ spdx_file.licenseInfoInFiles.append("NOASSERTATION")
"NOASSERTION"
+
doc.files.append(spdx_file)
doc.add_relationship(spdx_pkg, "CONTAINS", spdx_file)
spdx_pkg.hasFiles.append(spdx_file.SPDXID)
diff --git a/lib/oe/spdx.py b/lib/oe/spdx.py
index 9e7ced5..71e7c1c 100644
--- a/lib/oe/spdx.py
+++ b/lib/oe/spdx.py
@@ -236,7 +236,7 @@ class SPDXFile(SPDXObject):
fileName = _String()
licenseConcluded = _String(default="NOASSERTION")
copyrightText = _String(default="NOASSERTION")
- licenseInfoInFiles = _StringList(default=["NOASSERTION"])
+ licenseInfoInFiles = _StringList()
It's required to have "NOASSERTION" as the default if you don't do
anything, so we shouldn't change the default here (by and large, this
file should capture the spec over our use of it).
It's on my TODO list to make the "default" lists behave like default
scalars, where appending replaces the default instead of appending to
it, but I haven't gotten there yet; it hasn't come up as a problem before.
Probably need to do something like:
license_info_from_file = []
# scan files here
if license_info_from_files:
spdx_file.licenseInfoInFiles = license_info_from_files
checksums = _ObjectList(SPDXChecksum)
fileTypes = _StringList()
-=-=-=-=-=-=-=-=-=-=-=-
Links: You receive all messages sent to this group.
View/Reply Online (#161085):
https://lists.openembedded.org/g/openembedded-core/message/161085
Mute This Topic: https://lists.openembedded.org/mt/88756042/21656
Group Owner: [email protected]
Unsubscribe: https://lists.openembedded.org/g/openembedded-core/unsub
[[email protected]]
-=-=-=-=-=-=-=-=-=-=-=-