diff mbox series

[RFC,1/2] utils: Add write_files_manifest helper

Message ID 20240606140622.2494668-2-JPEWhacker@gmail.com
State New
Headers show
Series Write out file manifest in do_unpack | expand

Commit Message

Joshua Watt June 6, 2024, 2:03 p.m. UTC
Adds a helper function to write out a JSON file with a manifest of
files

Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
---
 meta/lib/oe/utils.py | 67 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
diff mbox series

Patch

diff --git a/meta/lib/oe/utils.py b/meta/lib/oe/utils.py
index 14a7d07ef01..5437e7a2d9f 100644
--- a/meta/lib/oe/utils.py
+++ b/meta/lib/oe/utils.py
@@ -8,6 +8,9 @@  import subprocess
 import multiprocessing
 import traceback
 import errno
+import json
+import hashlib
+import re
 
 def read_file(filename):
     try:
@@ -540,3 +543,67 @@  def touch(filename):
         # Handle read-only file systems gracefully
         if e.errno != errno.EROFS:
             raise e
+
+
+def write_file_manifest(path, outfile, *, include_hash=True, extract_lic=False, ignore_dirs=[]):
+    LIC_REGEX = re.compile(rb'^\W*SPDX-License-Identifier:\s*([ \w\d.()+-]+?)(?:\s+\W*)?$', re.MULTILINE)
+
+    manifest = {
+        "files": [],
+        "symlinks": [],
+        "dirs": [],
+    }
+
+    for root, dirs, files in os.walk(path):
+        dirs[:] = [d for d in dirs if d not in ignore_dirs]
+
+        for fn in files:
+            p = os.path.join(root, fn)
+            stat = os.lstat(p)
+            if os.path.islink(p):
+                manifest["symlinks"].append({
+                    "path": os.path.relpath(p, path),
+                    "target": os.readlink(p),
+                    "mode": stat.st_mode,
+                })
+                continue
+
+            data = {
+                "path": os.path.relpath(p, path),
+                "size": stat.st_size,
+                "mode": stat.st_mode,
+            }
+
+            if include_hash or extract_lic:
+                h = hashlib.sha256()
+                with open(p, "rb") as f:
+                    if extract_lic:
+                        d = f.read(15000)
+                        h.update(d)
+
+                        licenses = re.findall(LIC_REGEX, d)
+                        if licenses:
+                            data["licenses"] = [lic.decode('ascii') for lic in licenses]
+
+                    if include_hash:
+                        while True:
+                            d = f.read(4096)
+                            if not d:
+                                break
+                            h.update(d)
+
+            if include_hash:
+                data["sha256"] = h.hexdigest()
+
+            manifest["files"].append(data)
+
+        for dn in dirs:
+            p = os.path.join(root, dn)
+            stat = os.lstat(p)
+            manifest["dirs"].append({
+                "path": os.path.relpath(p, path),
+                "mode": stat.st_mode,
+            })
+
+    with open(outfile, "w") as f:
+        json.dump(manifest, f)