@@ -357,6 +357,155 @@ def collect_dep_sources(dep_objsets, dest):
index_sources_by_hash(e.to, dest)
+def extract_dependency_metadata(d, file_name):
+ """
+ Extract version and generate PURL for dependency packages.
+
+ Uses recipe metadata (PV, inherited classes) to determine package ecosystem
+ rather than guessing from filenames. Only parses filenames for unambiguous
+ cases where the file extension definitively identifies the ecosystem.
+
+ Supported ecosystems:
+ - Rust crates (.crate extension is unambiguous)
+ - Go modules (when GO_IMPORT is set or domain pattern is explicit)
+ - PyPI packages (when recipe inherits pypi class)
+ - NPM packages (when recipe inherits npm class)
+ - CPAN packages (when recipe inherits cpan class)
+ - NuGet packages (when recipe inherits nuget/dotnet class)
+ - Maven packages (when recipe inherits maven class)
+
+ Returns: (version, purl) tuple, or (None, None) if cannot determine
+ """
+ import re
+
+ # Get version from recipe PV (always prefer recipe metadata over filename parsing)
+ pv = d.getVar("PV")
+ version = pv if pv else None
+ purl = None
+
+ # Case 1: Rust crate - .crate extension is unambiguous
+ if file_name.endswith('.crate'):
+ crate_match = re.match(r'^(.+?)-(\d+\.\d+\.\d+(?:\.\d+)?(?:[-+][\w.]+)?)\.crate$', file_name)
+ if crate_match:
+ name = crate_match.group(1)
+ # Use filename version for crates (they embed version in filename)
+ version = crate_match.group(2)
+ purl = f"pkg:cargo/{name}@{version}"
+ return (version, purl)
+
+ # Case 2: Go module - check if GO_IMPORT is set (most reliable)
+ go_import = d.getVar("GO_IMPORT")
+ if go_import and version:
+ # GO_IMPORT contains the module path (e.g., github.com/containers/storage)
+ purl = f"pkg:golang/{go_import}@{version}"
+ return (version, purl)
+
+ # Case 3: Go module from filename - only for explicit hosting domains with version in filename
+ # Patterns like github.com.user.repo-v1.2.3.tar.gz where the domain is explicit
+ go_match = re.match(
+ r'^((?:github|gitlab|gopkg|golang|go\.googlesource)\.com\.[\w.]+(?:\.[\w-]+)*?)-(v?\d+\.\d+\.\d+(?:[-+][\w.]+)?)\.',
+ file_name
+ )
+ if go_match:
+ # Convert dots to slashes for proper Go module path
+ # github.com.containers.storage → github.com/containers/storage
+ module_path = go_match.group(1).replace('.', '/', 1) # First dot only
+ parts = module_path.split('/', 1)
+ if len(parts) == 2:
+ domain = parts[0]
+ path = parts[1].replace('.', '/')
+ module_path = f"{domain}/{path}"
+
+ version = go_match.group(2)
+ purl = f"pkg:golang/{module_path}@{version}"
+ return (version, purl)
+
+ # Case 4: PyPI package - check if recipe inherits pypi class
+ if bb.data.inherits_class("pypi", d) and version:
+ # Get the PyPI package name from PYPI_PACKAGE variable (handles python3- prefix removal)
+ pypi_package = d.getVar("PYPI_PACKAGE")
+ if pypi_package:
+ # Normalize package name per PEP 503
+ name = re.sub(r"[-_.]+", "-", pypi_package).lower()
+ purl = f"pkg:pypi/{name}@{version}"
+ return (version, purl)
+
+ # Case 5: NPM package - check if recipe inherits npm class
+ if bb.data.inherits_class("npm", d) and version:
+ # Get package name from recipe
+ bpn = d.getVar("BPN")
+ if bpn:
+ # Remove npm- prefix if present
+ name = bpn[4:] if bpn.startswith('npm-') else bpn
+ purl = f"pkg:npm/{name}@{version}"
+ return (version, purl)
+
+ # Case 6: CPAN package - check if recipe inherits cpan class
+ if bb.data.inherits_class("cpan", d) and version:
+ # Get package name from recipe
+ bpn = d.getVar("BPN")
+ if bpn:
+ # Remove perl- or libperl- prefixes if present
+ if bpn.startswith('perl-'):
+ name = bpn[5:]
+ elif bpn.startswith('libperl-'):
+ name = bpn[8:]
+ else:
+ name = bpn
+ purl = f"pkg:cpan/{name}@{version}"
+ return (version, purl)
+
+ # Case 7: NuGet package - check if recipe inherits nuget/dotnet class
+ if (bb.data.inherits_class("nuget", d) or bb.data.inherits_class("dotnet", d)) and version:
+ bpn = d.getVar("BPN")
+ if bpn:
+ # Remove dotnet- or nuget- prefix if present
+ if bpn.startswith('dotnet-'):
+ name = bpn[7:]
+ elif bpn.startswith('nuget-'):
+ name = bpn[6:]
+ else:
+ name = bpn
+ purl = f"pkg:nuget/{name}@{version}"
+ return (version, purl)
+
+ # Case 8: Maven package - check if recipe inherits maven class
+ if bb.data.inherits_class("maven", d) and version:
+ # Maven PURLs require group:artifact format
+ # Check for MAVEN_GROUP_ID and MAVEN_ARTIFACT_ID variables
+ group_id = d.getVar("MAVEN_GROUP_ID")
+ artifact_id = d.getVar("MAVEN_ARTIFACT_ID")
+
+ if group_id and artifact_id:
+ # Proper Maven PURL: pkg:maven/group.id/artifact@version
+ purl = f"pkg:maven/{group_id}/{artifact_id}@{version}"
+ return (version, purl)
+ else:
+ # Fallback: use BPN as artifact name without group
+ bpn = d.getVar("BPN")
+ if bpn:
+ # Remove maven- or java- prefix if present
+ if bpn.startswith('maven-'):
+ name = bpn[6:]
+ elif bpn.startswith('java-'):
+ name = bpn[5:]
+ else:
+ name = bpn
+ purl = f"pkg:maven/{name}@{version}"
+ return (version, purl)
+
+ # Fallback: use pkg:generic for source downloads without specific ecosystem
+ # This covers C/C++ libraries and other non-ecosystem packages
+ bpn = d.getVar("BPN")
+ if version and bpn:
+ # Generic PURL for source tarballs (e.g., zlib, openssl, curl)
+ # The built package will have pkg:yocto/... PURL
+ purl = f"pkg:generic/{bpn}@{version}"
+ return (version, purl)
+
+ return (version, None)
+
+
def add_download_files(d, objset):
inputs = set()
@@ -408,6 +557,9 @@ def add_download_files(d, objset):
inputs.add(file)
else:
+ # Extract version and PURL for dependency packages using recipe metadata
+ dep_version, dep_purl = extract_dependency_metadata(d, file_name)
+
dl = objset.add(
oe.spdx30.software_Package(
_id=objset.new_spdxid("source", str(download_idx + 1)),
@@ -420,6 +572,14 @@ def add_download_files(d, objset):
)
)
+ # Add version if extracted
+ if dep_version:
+ dl.software_packageVersion = dep_version
+
+ # Add PURL if generated
+ if dep_purl:
+ dl.software_packageUrl = dep_purl
+
if fd.method.supports_checksum(fd):
# TODO Need something better than hard coding this
for checksum_id in ["sha256", "sha1"]: