On 8 June 2015 at 11:25, Lei Maohui <[email protected]> wrote: > The main changes are: > 1. use "curl" command instead of "wget" when get spdx file from FOSSologySPDX > instance server. > > Before apply these patches, the command is : > wget -qO - --no-check-certificate --timeout=0 > --post-file=xxx/yyy/zzz.tar.gz > http://localhost//?mod=spdx_license_once&noCopyright=${FOSS_COPYRIGHT}&recursiveUnpack=${FOSS_RECURSIVE_UNPACK} > > After apply these patches, the command is : > curl http://127.0.0.1/repo/ --noproxy 127.0.0.1 -k -F > "mod=spdx_license_once" -F "noCopyright=false" -F "jsonOutput=false" -F > "fullSPDXFlag=true" -F "file=@ xxx/yyy/zzz.tar.gz" -o xxx/yyy/zzz.spdx > > Because if use "wget" command,the Mandatory fields of the SPDX > Specification such as the following can't be obtained. > 1) PackageLicenseInfoFromFiles(Package Information) > 2) PackageLicenseDeclared(Package Information) > 3) LicenseID(License Information) > 4) ExtractedText(License Information) > 5) LicenseName(License Information) > > 2. In order to avoid the SPDX_S be polluted in the rebuild, create > ${WORKDIR}/${SPDX_TEMP_DIR} to save the source. > > 3. Add mandatory field to be compliant with the SPDX 1.2 Specification. > > Signed-off-by: Lei Maohui <[email protected]> > --- > meta/classes/spdx.bbclass | 425 > +++++++++++++++++----------------------------- > 1 file changed, 155 insertions(+), 270 deletions(-) > > diff --git a/meta/classes/spdx.bbclass b/meta/classes/spdx.bbclass > index 454c53e..09584af 100644 > --- a/meta/classes/spdx.bbclass > +++ b/meta/classes/spdx.bbclass > @@ -15,178 +15,191 @@ > # SPDX file will be output to the path which is defined as[SPDX_MANIFEST_DIR] > # in ./meta/conf/licenses.conf. > > +SPDXOUTPUTDIR = "${WORKDIR}/spdx_output_dir" > SPDXSSTATEDIR = "${WORKDIR}/spdx_sstate_dir" > > # If ${S} isn't actually the top-level source directory, set SPDX_S to point > at > # the real top-level directory. > + > SPDX_S ?= "${S}" > > python do_spdx () { > import os, sys > - import json, shutil > + import json > + > + #The source of gcc is too large to get it's spdx.So,give up. > + bpn = d.getVar('BPN', True) > + if ((bpn == "gcc") or (bpn == "libgcc")): > + return None > > info = {} > info['workdir'] = d.getVar('WORKDIR', True) > - info['sourcedir'] = d.getVar('SPDX_S', True) > info['pn'] = d.getVar('PN', True) > info['pv'] = d.getVar('PV', True) > + info['package_download_location'] = d.getVar('SRC_URI', True) > + if info['package_download_location'] != "": > + info['package_download_location'] = > info['package_download_location'].split()[0] > info['spdx_version'] = d.getVar('SPDX_VERSION', True) > info['data_license'] = d.getVar('DATA_LICENSE', True) > + info['creator'] = {} > + info['creator']['Tool'] = d.getVar('CREATOR_TOOL', True) > + info['license_list_version'] = d.getVar('LICENSELISTVERSION', True) > + info['package_homepage'] = d.getVar('HOMEPAGE', True) > + info['package_summary'] = d.getVar('SUMMARY', True) > > - sstatedir = d.getVar('SPDXSSTATEDIR', True) > - sstatefile = os.path.join(sstatedir, info['pn'] + info['pv'] + ".spdx") > - > + spdx_sstate_dir = d.getVar('SPDXSSTATEDIR', True) > manifest_dir = d.getVar('SPDX_MANIFEST_DIR', True) > - info['outfile'] = os.path.join(manifest_dir, info['pn'] + ".spdx" ) > - > - info['spdx_temp_dir'] = d.getVar('SPDX_TEMP_DIR', True) > - info['tar_file'] = os.path.join(info['workdir'], info['pn'] + ".tar.gz" ) > - > + info['outfile'] = os.path.join(manifest_dir, info['pn'] + "-" + > info['pv'] + ".spdx") > + sstatefile = os.path.join(spdx_sstate_dir, > + info['pn'] + "-" + info['pv'] + ".spdx" ) > + info['tar_file'] = os.path.join(info['workdir'], info['pn'] + ".tar.gz") > + > # Make sure important dirs exist > try: > bb.utils.mkdirhier(manifest_dir) > - bb.utils.mkdirhier(sstatedir) > - bb.utils.mkdirhier(info['spdx_temp_dir']) > + bb.utils.mkdirhier(spdx_sstate_dir) > except OSError as e: > bb.error("SPDX: Could not set up required directories: " + str(e)) > return > > ## get everything from cache. use it to decide if > - ## something needs to be rerun > - cur_ver_code = get_ver_code(info['sourcedir']) > + ## something needs to be rerun > + d.setVar('WORKDIR', d.getVar('SPDX_TEMP_DIR', True)) > + info['sourcedir'] = d.getVar('SPDX_S', True) > + cur_ver_code = get_ver_code(info['sourcedir']).split()[0] > cache_cur = False > if os.path.exists(sstatefile): > ## cache for this package exists. read it in > cached_spdx = get_cached_spdx(sstatefile) > - > - if cached_spdx['PackageVerificationCode'] == cur_ver_code: > - bb.warn("SPDX: Verification code for " + info['pn'] > - + "is same as cache's. do nothing") > + if cached_spdx: > + cached_spdx = cached_spdx.split()[0] > + if (cached_spdx == cur_ver_code): > + bb.warn(info['pn'] + "'s ver code same as cache's. do nothing") > cache_cur = True > - else: > - local_file_info = setup_foss_scan(info, True, > cached_spdx['Files']) > - else: > - local_file_info = setup_foss_scan(info, False, None) > - > - if cache_cur: > - spdx_file_info = cached_spdx['Files'] > - foss_package_info = cached_spdx['Package'] > - foss_license_info = cached_spdx['Licenses'] > - else: > + create_manifest(info,sstatefile) > + if not cache_cur: > ## setup fossology command > foss_server = d.getVar('FOSS_SERVER', True) > - foss_flags = d.getVar('FOSS_WGET_FLAGS', True) > - foss_full_spdx = d.getVar('FOSS_FULL_SPDX', True) == "true" or False > - foss_command = "wget %s --post-file=%s %s"\ > - % (foss_flags, info['tar_file'], foss_server) > - > - foss_result = run_fossology(foss_command, foss_full_spdx) > - if foss_result is not None: > - (foss_package_info, foss_file_info, foss_license_info) = > foss_result > - spdx_file_info = create_spdx_doc(local_file_info, foss_file_info) > - ## write to cache > - write_cached_spdx(sstatefile, cur_ver_code, foss_package_info, > - spdx_file_info, foss_license_info) > + foss_flags = d.getVar('FOSS_CURL_FLAGS', True) > + foss_command = "curl %s -k %s -F \"file=@%s\" -o %s"\ > + % (foss_server,foss_flags,info['tar_file'],sstatefile) > + > + #get the source tarball for fossy_scan > + setup_foss_scan(info) > + #get spdx file from fossylogy server > + run_fossology(foss_command) > + if get_cached_spdx(sstatefile) != None: > + write_cached_spdx(info,sstatefile,cur_ver_code) > + ## CREATE MANIFEST(write to outfile ) > + create_manifest(info,sstatefile) > else: > - bb.error("SPDX: Could not communicate with FOSSology server. > Command was: " + foss_command) > - return > - > - ## Get document and package level information > - spdx_header_info = get_header_info(info, cur_ver_code, foss_package_info) > - > - ## CREATE MANIFEST > - create_manifest(info, spdx_header_info, spdx_file_info, > foss_license_info) > - > - ## clean up the temp stuff > - shutil.rmtree(info['spdx_temp_dir'], ignore_errors=True) > + bb.warn('Can\'t get the spdx file' + info['pn'] + '. Please > check your fossylogy server.') > if os.path.exists(info['tar_file']): > remove_file(info['tar_file']) > + d.setVar('WORKDIR', info['workdir']) > +} > +#Get the src after do_patch. > +python do_get_spdx_s() { > + import shutil > + #The source of gcc is too large to get it's spdx.So,give up. > + bpn = d.getVar('BPN', True) > + if ((bpn == "gcc") or (bpn == "libgcc")): > + return None > + # Change the WORKDIR to make do_unpack do_patch run in another dir. > + d.setVar('WORKDIR', d.getVar('SPDX_TEMP_DIR', True)) > + # The changed 'WORKDIR' also casued 'B' changed, create dir 'B' for the > + # possibly requiring of the following tasks (such as some recipes's > + # do_patch required 'B' existed). > + bb.utils.mkdirhier(d.getVar('B', True)) > + > + # The kernel source is ready after do_validate_branches > + if bb.data.inherits_class('kernel-yocto', d): > + shutil.copytree(d.getVar('S', True), d.getVar('WORKDIR', True) + > "/kernel-source") > + return None > + else: > + bb.build.exec_func('do_unpack', d) > + # The S of the gcc source is work-share > + if ((bpn == "gcc") or (bpn == "libgcc")): > + d.setVar('S', d.getVar('WORKDIR', True) + "/gcc-" + d.getVar('PV', > True)) > + bb.build.exec_func('do_patch', d) > } > -addtask spdx after do_patch before do_configure > - > -def create_manifest(info, header, files, licenses): > - import codecs > - with codecs.open(info['outfile'], mode='w', encoding='utf-8') as f: > - # Write header > - f.write(header + '\n') > > - # Write file data > - for chksum, block in files.iteritems(): > - f.write("FileName: " + block['FileName'] + '\n') > - for key, value in block.iteritems(): > - if not key == 'FileName': > - f.write(key + ": " + value + '\n') > - f.write('\n') > +addtask get_spdx_s after do_patch before do_configure > +addtask spdx after do_get_spdx_s before do_configure > > - # Write license data > - for id, block in licenses.iteritems(): > - f.write("LicenseID: " + id + '\n') > - for key, value in block.iteritems(): > - f.write(key + ": " + value + '\n') > - f.write('\n') > +def create_manifest(info,sstatefile): > + import shutil > + shutil.copyfile(sstatefile,info['outfile']) > > def get_cached_spdx(sstatefile): > - import json > - import codecs > - cached_spdx_info = {} > - with codecs.open(sstatefile, mode='r', encoding='utf-8') as f: > - try: > - cached_spdx_info = json.load(f) > - except ValueError as e: > - cached_spdx_info = None > - return cached_spdx_info > + import subprocess > + if not os.path.exists(sstatefile): > + return None > + > + try: > + output = subprocess.check_output(['grep', "PackageVerificationCode", > sstatefile]) > + except subprocess.CalledProcessError as e: > + return None > + cached_spdx_info=output.split(': ') > + return cached_spdx_info[1] > > -def write_cached_spdx(sstatefile, ver_code, package_info, files, > license_info): > - import json > - import codecs > - spdx_doc = {} > - spdx_doc['PackageVerificationCode'] = ver_code > - spdx_doc['Files'] = {} > - spdx_doc['Files'] = files > - spdx_doc['Package'] = {} > - spdx_doc['Package'] = package_info > - spdx_doc['Licenses'] = {} > - spdx_doc['Licenses'] = license_info > - with codecs.open(sstatefile, mode='w', encoding='utf-8') as f: > - f.write(json.dumps(spdx_doc)) > +#add necessary information into spdx file > +def write_cached_spdx(info,sstatefile, ver_code): > + import subprocess > > -def setup_foss_scan(info, cache, cached_files): > - import errno, shutil > - import tarfile > - file_info = {} > - cache_dict = {} > + def sed_replace(dest_sed_cmd,key_word,replace_info): > + dest_sed_cmd = dest_sed_cmd + "-e 's#^" + key_word + ".*#" + \ > + key_word + replace_info + "#' " > + return dest_sed_cmd > > - for f_dir, f in list_files(info['sourcedir']): > - full_path = os.path.join(f_dir, f) > - abs_path = os.path.join(info['sourcedir'], full_path) > - dest_dir = os.path.join(info['spdx_temp_dir'], f_dir) > - dest_path = os.path.join(info['spdx_temp_dir'], full_path) > + def sed_insert(dest_sed_cmd,key_word,new_line): > + dest_sed_cmd = dest_sed_cmd + "-e '/^" + key_word \ > + + r"/a\\" + new_line + "' " > + return dest_sed_cmd > > - checksum = hash_file(abs_path) > - if not checksum is None: > - file_info[checksum] = {} > - ## retain cache information if it exists > - if cache and checksum in cached_files: > - file_info[checksum] = cached_files[checksum] > - ## have the file included in what's sent to the FOSSology server > - else: > - file_info[checksum]['FileName'] = full_path > - try: > - bb.utils.mkdirhier(dest_dir) > - shutil.copyfile(abs_path, dest_path) > - except OSError as e: > - bb.warn("SPDX: mkdirhier failed: " + str(e)) > - except shutil.Error as e: > - bb.warn("SPDX: copyfile failed: " + str(e)) > - except IOError as e: > - bb.warn("SPDX: copyfile failed: " + str(e)) > - else: > - bb.warn("SPDX: Could not get checksum for file: " + f) > + ## document level information > + sed_cmd = r"sed -i -e 's#\r$##g' " > + sed_cmd = sed_replace(sed_cmd,"SPDXVersion: ",info['spdx_version']) > + spdx_DocumentComment = "<text>SPDX for " + info['pn'] + " version " \ > + + info['pv'] + "</text>" > + sed_cmd = sed_replace(sed_cmd,"DocumentComment",spdx_DocumentComment) > > - with tarfile.open(info['tar_file'], "w:gz") as tar: > - tar.add(info['spdx_temp_dir'], > arcname=os.path.basename(info['spdx_temp_dir'])) > + ## Creator information > + sed_cmd = sed_replace(sed_cmd,"Creator: Tool: ",info['creator']['Tool']) > + sed_cmd = sed_insert(sed_cmd,"CreatorComment: ","LicenseListVersion: " + > info['license_list_version']) > + > + ## package level information > + sed_cmd = sed_replace(sed_cmd,"PackageName: ",info['pn']) > + sed_cmd = sed_replace(sed_cmd,"PackageVersion: ",info['pv']) > + sed_cmd = sed_replace(sed_cmd,"PackageDownloadLocation: > ",info['package_download_location']) > + sed_cmd = sed_insert(sed_cmd,"PackageChecksum: ","PackageHomePage: " + > info['package_homepage']) > + sed_cmd = sed_replace(sed_cmd,"PackageSummary: ","<text>" + > info['package_summary'] + "</text>") > + sed_cmd = sed_replace(sed_cmd,"PackageFileName: > ",os.path.basename(info['tar_file'])) > + sed_cmd = sed_replace(sed_cmd,"PackageVerificationCode: ",ver_code) > + sed_cmd = sed_replace(sed_cmd,"PackageDescription: ", > + "<text>" + info['pn'] + " version " + info['pv'] + "</text>") > + sed_cmd = sed_cmd + sstatefile > + > + subprocess.call("%s" % sed_cmd, shell=True) > + > +#archive the SPDX_S for get spdx file from fossylogy server > +def setup_foss_scan(info): > + import tarfile,os > + srcdir = info['sourcedir'].rstrip('/') > + dirname = os.path.dirname(srcdir) > + basename = os.path.basename(srcdir) > + os.chdir(dirname) > + tar = tarfile.open(info['tar_file'], 'w:gz') > + tar.add(basename) > + tar.close() > + > > - return file_info > +def remove_dir_tree(dir_name): > + import shutil > + try: > + shutil.rmtree(dir_name) > + except: > + pass > > def remove_file(file_name): > try: > @@ -203,12 +216,14 @@ def list_files(dir): > > def hash_file(file_name): > try: > - with open(file_name, 'rb') as f: > - data_string = f.read() > - sha1 = hash_string(data_string) > - return sha1 > + f = open(file_name, 'rb') > + data_string = f.read() > except: > - return None > + return None > + finally: > + f.close() > + sha1 = hash_string(data_string) > + return sha1 > > def hash_string(data): > import hashlib > @@ -216,150 +231,20 @@ def hash_string(data): > sha1.update(data) > return sha1.hexdigest() > > -def run_fossology(foss_command, full_spdx): > - import string, re > - import subprocess > - > - p = subprocess.Popen(foss_command.split(), > - stdout=subprocess.PIPE, stderr=subprocess.PIPE) > - foss_output, foss_error = p.communicate() > - if p.returncode != 0: > - return None > - > - foss_output = unicode(foss_output, "utf-8") > - foss_output = string.replace(foss_output, '\r', '') > - > - # Package info > - package_info = {} > - if full_spdx: > - # All mandatory, only one occurance > - package_info['PackageCopyrightText'] = > re.findall('PackageCopyrightText: (.*?</text>)', foss_output, re.S)[0] > - package_info['PackageLicenseDeclared'] = > re.findall('PackageLicenseDeclared: (.*)', foss_output)[0] > - package_info['PackageLicenseConcluded'] = > re.findall('PackageLicenseConcluded: (.*)', foss_output)[0] > - # These may be more than one > - package_info['PackageLicenseInfoFromFiles'] = > re.findall('PackageLicenseInfoFromFiles: (.*)', foss_output) > - else: > - DEFAULT = "NOASSERTION" > - package_info['PackageCopyrightText'] = "<text>" + DEFAULT + "</text>" > - package_info['PackageLicenseDeclared'] = DEFAULT > - package_info['PackageLicenseConcluded'] = DEFAULT > - package_info['PackageLicenseInfoFromFiles'] = [] > - > - # File info > - file_info = {} > - records = [] > - # FileName is also in PackageFileName, so we match on FileType as well. > - records = re.findall('FileName:.*?FileType:.*?</text>', foss_output, > re.S) > - for rec in records: > - chksum = re.findall('FileChecksum: SHA1: (.*)\n', rec)[0] > - file_info[chksum] = {} > - file_info[chksum]['FileCopyrightText'] = > re.findall('FileCopyrightText: ' > - + '(.*?</text>)', rec, re.S )[0] > - fields = ['FileName', 'FileType', 'LicenseConcluded', > 'LicenseInfoInFile'] > - for field in fields: > - file_info[chksum][field] = re.findall(field + ': (.*)', rec)[0] > - > - # Licenses > - license_info = {} > - licenses = [] > - licenses = re.findall('LicenseID:.*?LicenseName:.*?\n', foss_output, > re.S) > - for lic in licenses: > - license_id = re.findall('LicenseID: (.*)\n', lic)[0] > - license_info[license_id] = {} > - license_info[license_id]['ExtractedText'] = > re.findall('ExtractedText: (.*?</text>)', lic, re.S)[0] > - license_info[license_id]['LicenseName'] = re.findall('LicenseName: > (.*)', lic)[0] > - > - return (package_info, file_info, license_info) > - > -def create_spdx_doc(file_info, scanned_files): > - import json > - ## push foss changes back into cache > - for chksum, lic_info in scanned_files.iteritems(): > - if chksum in file_info: > - file_info[chksum]['FileType'] = lic_info['FileType'] > - file_info[chksum]['FileChecksum: SHA1'] = chksum > - file_info[chksum]['LicenseInfoInFile'] = > lic_info['LicenseInfoInFile'] > - file_info[chksum]['LicenseConcluded'] = > lic_info['LicenseConcluded'] > - file_info[chksum]['FileCopyrightText'] = > lic_info['FileCopyrightText'] > - else: > - bb.warn("SPDX: " + lic_info['FileName'] + " : " + chksum > - + " : is not in the local file info: " > - + json.dumps(lic_info, indent=1)) > - return file_info > +def run_fossology(foss_command): > + import subprocess > + subprocess.call(foss_command, shell=True) > > def get_ver_code(dirname): > chksums = [] > for f_dir, f in list_files(dirname): > - hash = hash_file(os.path.join(dirname, f_dir, f)) > - if not hash is None: > - chksums.append(hash) > - else: > - bb.warn("SPDX: Could not hash file: " + path) > + try: > + stats = os.stat(os.path.join(dirname,f_dir,f)) > + except OSError as e: > + bb.warn("Stat failed" + str(e) + "\n") > + continue > + chksums.append(hash_file(os.path.join(dirname,f_dir,f))) > ver_code_string = ''.join(chksums).lower() > ver_code = hash_string(ver_code_string) > return ver_code > > -def get_header_info(info, spdx_verification_code, package_info): > - """ > - Put together the header SPDX information. > - Eventually this needs to become a lot less > - of a hardcoded thing. > - """ > - from datetime import datetime > - import os > - head = [] > - DEFAULT = "NOASSERTION" > - > - package_checksum = hash_file(info['tar_file']) > - if package_checksum is None: > - package_checksum = DEFAULT > - > - ## document level information > - head.append("## SPDX Document Information") > - head.append("SPDXVersion: " + info['spdx_version']) > - head.append("DataLicense: " + info['data_license']) > - head.append("DocumentComment: <text>SPDX for " > - + info['pn'] + " version " + info['pv'] + "</text>") > - head.append("") > - > - ## Creator information > - ## Note that this does not give time in UTC. > - now = datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ') > - head.append("## Creation Information") > - ## Tools are supposed to have a version, but FOSSology+SPDX provides > none. > - head.append("Creator: Tool: FOSSology+SPDX") > - head.append("Created: " + now) > - head.append("CreatorComment: <text>UNO</text>") > - head.append("") > - > - ## package level information > - head.append("## Package Information") > - head.append("PackageName: " + info['pn']) > - head.append("PackageVersion: " + info['pv']) > - head.append("PackageFileName: " + os.path.basename(info['tar_file'])) > - head.append("PackageSupplier: Person:" + DEFAULT) > - head.append("PackageDownloadLocation: " + DEFAULT) > - head.append("PackageSummary: <text></text>") > - head.append("PackageOriginator: Person:" + DEFAULT) > - head.append("PackageChecksum: SHA1: " + package_checksum) > - head.append("PackageVerificationCode: " + spdx_verification_code) > - head.append("PackageDescription: <text>" + info['pn'] > - + " version " + info['pv'] + "</text>") > - head.append("") > - head.append("PackageCopyrightText: " > - + package_info['PackageCopyrightText']) > - head.append("") > - head.append("PackageLicenseDeclared: " > - + package_info['PackageLicenseDeclared']) > - head.append("PackageLicenseConcluded: " > - + package_info['PackageLicenseConcluded']) > - > - for licref in package_info['PackageLicenseInfoFromFiles']: > - head.append("PackageLicenseInfoFromFiles: " + licref) > - head.append("") > - > - ## header for file level > - head.append("## File Information") > - head.append("") > - > - return '\n'.join(head) > -- > 1.8.4.2 > > -- > _______________________________________________ > Openembedded-core mailing list > [email protected] > http://lists.openembedded.org/mailman/listinfo/openembedded-core
A few comments here. This *looks* ok, but I don't have a public fossology server to play with so I really can't test it. Can you work with me off list to get access to one/setup? I guess my one main comment is that I would have liked this to have been spread across a few commits. There are a lot of functional changes here that should have been in around 2-3 commits perhaps? -b -- Elizabeth Flanagan Yocto Project Build and Release -- _______________________________________________ Openembedded-core mailing list [email protected] http://lists.openembedded.org/mailman/listinfo/openembedded-core
