From patchwork Fri Jun 13 13:16:18 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ross Burton X-Patchwork-Id: 64924 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from aws-us-west-2-korg-lkml-1.web.codeaurora.org (localhost.localdomain [127.0.0.1]) by smtp.lore.kernel.org (Postfix) with ESMTP id DFEC3C71159 for ; Fri, 13 Jun 2025 13:16:31 +0000 (UTC) Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by mx.groups.io with SMTP id smtpd.web10.10096.1749820588015741745 for ; Fri, 13 Jun 2025 06:16:28 -0700 Authentication-Results: mx.groups.io; dkim=none (message not signed); spf=pass (domain: arm.com, ip: 217.140.110.172, mailfrom: ross.burton@arm.com) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 4F0EA1C0A for ; Fri, 13 Jun 2025 06:16:07 -0700 (PDT) Received: from cesw-amp-gbt-1s-m12830-04.lab.cambridge.arm.com (usa-sjc-imap-foss1.foss.arm.com [10.121.207.14]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 518393F59E for ; Fri, 13 Jun 2025 06:16:27 -0700 (PDT) From: Ross Burton To: openembedded-core@lists.openembedded.org Subject: [PATCH 09/10] oe/license_finder: rewrite license checksum loading, scan more licenses Date: Fri, 13 Jun 2025 14:16:18 +0100 Message-ID: <20250613131620.221912-9-ross.burton@arm.com> X-Mailer: git-send-email 2.43.0 In-Reply-To: <20250613131620.221912-1-ross.burton@arm.com> References: <20250613131620.221912-1-ross.burton@arm.com> MIME-Version: 1.0 List-Id: X-Webhook-Received: from li982-79.members.linode.com [45.33.32.79] by aws-us-west-2-korg-lkml-1.web.codeaurora.org with HTTPS for ; Fri, 13 Jun 2025 13:16:31 -0000 X-Groupsio-URL: https://lists.openembedded.org/g/openembedded-core/message/218609 Rewrite the license checksum generation and loading of CSV files to be clearer. This also expands the scan of COMMON_LICENSE_DIR to include LICENSE_PATH, which can be extended by layers to provide more license texts. Signed-off-by: Ross Burton --- meta/lib/oe/license_finder.py | 65 ++++++++++++++++------------------- 1 file changed, 29 insertions(+), 36 deletions(-) diff --git a/meta/lib/oe/license_finder.py b/meta/lib/oe/license_finder.py index cacb4cb19d6..1bdc39e1c53 100644 --- a/meta/lib/oe/license_finder.py +++ b/meta/lib/oe/license_finder.py @@ -11,24 +11,18 @@ import os import re import bb +import bb.utils logger = logging.getLogger("BitBake.OE.LicenseFinder") -def get_license_md5sums(d): - import bb.utils +def _load_hash_csv(d): + """ + Load a mapping of (checksum: license name) from all files/license-hashes.csv + files that can be found in the available layers. + """ import csv md5sums = {} - # Gather md5sums of license files in common license dir - commonlicdir = d.getVar('COMMON_LICENSE_DIR') - for fn in os.listdir(commonlicdir): - md5value = bb.utils.md5_file(os.path.join(commonlicdir, fn)) - md5sums[md5value] = fn - - # The following were extracted from common values in various recipes - # (double checking the license against the license file itself, not just - # the LICENSE value in the recipe) - # Read license md5sums from csv file for path in d.getVar('BBPATH').split(':'): csv_path = os.path.join(path, 'files', 'license-hashes.csv') @@ -41,28 +35,28 @@ def get_license_md5sums(d): return md5sums -def crunch_known_licenses(d): - ''' - Calculate the MD5 checksums for the crunched versions of all common - licenses. Also add additional known checksums. - ''' - - crunched_md5sums = {} +def _crunch_known_licenses(d): + """ + Calculate the MD5 checksums for the original and "crunched" versions of all + known licenses. + """ + md5sums = {} - commonlicdir = d.getVar('COMMON_LICENSE_DIR') - for fn in sorted(os.listdir(commonlicdir)): - md5value = crunch_license(os.path.join(commonlicdir, fn)) - if md5value not in crunched_md5sums: - crunched_md5sums[md5value] = fn - elif fn != crunched_md5sums[md5value]: - bb.debug(2, "crunched_md5sums['%s'] is already set to '%s' rather than '%s'" % (md5value, crunched_md5sums[md5value], fn)) - else: - bb.debug(2, "crunched_md5sums['%s'] is already set to '%s'" % (md5value, crunched_md5sums[md5value])) + lic_dirs = [d.getVar('COMMON_LICENSE_DIR')] + (d.getVar('LICENSE_PATH') or "").split() + for lic_dir in lic_dirs: + for fn in os.listdir(lic_dir): + path = os.path.join(lic_dir, fn) + # Hash the exact contents + md5value = bb.utils.md5_file(path) + md5sums[md5value] = fn + # Also hash a "crunched" version + md5value = _crunch_license(path) + md5sums[md5value] = fn - return crunched_md5sums + return md5sums -def crunch_license(licfile): +def _crunch_license(licfile): ''' Remove non-material text from a license file and then calculate its md5sum. This works well for licenses that contain a copyright statement, @@ -152,10 +146,9 @@ def find_license_files(srctree, first_only=False): def match_licenses(licfiles, srctree, d): - import bb - md5sums = get_license_md5sums(d) - - crunched_md5sums = crunch_known_licenses(d) + md5sums = {} + md5sums.update(_load_hash_csv(d)) + md5sums.update(_crunch_known_licenses(d)) licenses = [] for licfile in sorted(licfiles): @@ -163,8 +156,8 @@ def match_licenses(licfiles, srctree, d): md5value = bb.utils.md5_file(resolved_licfile) license = md5sums.get(md5value, None) if not license: - crunched_md5 = crunch_license(resolved_licfile) - license = crunched_md5sums.get(crunched_md5, None) + crunched_md5 = _crunch_license(resolved_licfile) + license = md5sums.get(crunched_md5, None) if not license: license = 'Unknown' logger.info("Please add the following line for '%s' to a 'license-hashes.csv' " \