@@ -31,6 +31,7 @@ tests = ["bb.tests.codeparser",
"bb.tests.runqueue",
"bb.tests.siggen",
"bb.tests.utils",
+ "bb.tests.trace_base",
"bb.tests.compression",
"hashserv.tests",
"layerindexlib.tests.layerindexobj",
@@ -28,6 +28,8 @@ import bb.checksum
import bb.process
import bb.event
+from .trace_base import TraceUnpackBase
+
__version__ = "2"
_checksum_cache = bb.checksum.FileChecksumCache()
@@ -1279,6 +1281,7 @@ class FetchData(object):
if not self.pswd and "pswd" in self.parm:
self.pswd = self.parm["pswd"]
self.setup = False
+ self.destdir = None
def configure_checksum(checksum_id):
if "name" in self.parm:
@@ -1557,6 +1560,8 @@ class FetchMethod(object):
bb.utils.mkdirhier(unpackdir)
else:
unpackdir = rootdir
+ urldata.destdir = unpackdir
+ urldata.is_unpacked_archive = unpack and cmd
if not unpack or not cmd:
# If file == dest, then avoid any copies, as we already put the file into dest!
@@ -1572,6 +1577,7 @@ class FetchMethod(object):
if urlpath.find("/") != -1:
destdir = urlpath.rsplit("/", 1)[0] + '/'
bb.utils.mkdirhier("%s/%s" % (unpackdir, destdir))
+ urldata.destdir = "%s/%s" % (unpackdir, destdir)
cmd = 'cp -fpPRH "%s" "%s"' % (file, destdir)
if not cmd:
@@ -1855,26 +1861,69 @@ class Fetch(object):
if not ret:
raise FetchError("URL %s doesn't work" % u, u)
- def unpack(self, root, urls=None):
+ def unpack(self, root, urls=None, is_module=False, checkout_destdir=None):
"""
- Unpack urls to root
+ Unpack urls to a tmp dir, trace, and then move everything to root
+
+ is_module needs to be set to true when this method is recursively called
+ by a fetcher's unpack method to unpack (sub)modules (gitsm, npmsw)
+
+ checkout_destdir needs to be passed when this method is recursively
+ called by gitsm fetcher
"""
if not urls:
urls = self.urls
+ if is_module:
+ destdir = root
+ else:
+ trace = TraceUnpackBase(root, self.d)
+ destdir = trace.tmpdir
for u in urls:
ud = self.ud[u]
+ # absolute subdir, destsuffix and subpath params wouldn't work when
+ # unpacking in the tmp dir, convert them to relative paths
+ realroot = os.path.realpath(root)
+ params = [ 'subdir', 'destsuffix', 'subpath' ]
+ for p in params:
+ if not ud.parm.get(p):
+ continue
+ if os.path.isabs(ud.parm[p]):
+ realpath = os.path.realpath(ud.parm[p])
+ if realpath.startswith(realroot):
+ ud.parm[p] = os.path.relpath(realpath, realroot)
ud.setup_localpath(self.d)
+ ud.rootdir = root
+
+ if hasattr(ud, "checkout_destdir"):
+ ud.checkout_destdir = checkout_destdir
if ud.lockfile:
lf = bb.utils.lockfile(ud.lockfile)
- ud.method.unpack(ud, root, self.d)
+ ud.method.unpack(ud, destdir, self.d)
if ud.lockfile:
bb.utils.unlockfile(lf)
+ if is_module:
+ continue
+
+ if hasattr(ud, "nocheckout") and ud.nocheckout:
+ logger.warning(
+ "Can't trace sources for"
+ " %s because repo has not been checked out" % u)
+ else:
+ trace.commit(u, ud)
+
+ trace.move2root()
+
+ if is_module:
+ return
+ trace.write_data()
+ trace.close()
+
def clean(self, urls=None):
"""
Clean files that the fetcher gets or places
@@ -101,8 +101,10 @@ class Crate(Wget):
bp = d.getVar('BP')
if bp == ud.parm.get('name'):
cmd = "tar -xz --no-same-owner -f %s" % thefile
+ ud.destdir = rootdir
else:
cargo_bitbake = self._cargo_bitbake_path(rootdir)
+ ud.destdir = cargo_bitbake
cmd = "tar -xz --no-same-owner -f %s -C %s" % (thefile, cargo_bitbake)
@@ -34,6 +34,11 @@ class GitSM(Git):
"""
return ud.type in ['gitsm']
+ def urldata_init(self, ud, d):
+ super(GitSM, self).urldata_init(ud, d)
+ ud.module_data = []
+ ud.checkout_destdir = None
+
def process_submodules(self, ud, workdir, function, d):
"""
Iterate over all of the submodules in this repository and execute
@@ -144,6 +149,15 @@ class GitSM(Git):
function(ud, url, module, paths[module], workdir, ld)
+ if function.__name__ == "unpack_submodules":
+ destdir = os.path.join(ud.checkout_destdir, paths[module])
+ ud.module_data.append({
+ "url": url,
+ "destdir": destdir.rstrip("/"),
+ "parent_destdir": ud.checkout_destdir.rstrip("/"),
+ "revision": subrevision[module]
+ })
+
return submodules != []
def need_update(self, ud, d):
@@ -215,9 +229,13 @@ class GitSM(Git):
else:
repo_conf = os.path.join(ud.destdir, '.git')
+ checkout_destdir = os.path.join(ud.checkout_destdir, modpath)
+
try:
newfetch = Fetch([url], d, cache=False)
- newfetch.unpack(root=os.path.dirname(os.path.join(repo_conf, 'modules', module)))
+ newfetch.unpack(root=os.path.dirname(os.path.join(repo_conf, 'modules', module)), is_module=True, checkout_destdir=checkout_destdir)
+ # add nested submodules' data
+ ud.module_data += newfetch.ud[url].module_data
except Exception as e:
logger.error('gitsm: submodule unpack failed: %s %s' % (type(e).__name__, str(e)))
raise
@@ -239,6 +257,10 @@ class GitSM(Git):
Git.unpack(self, ud, destdir, d)
+ if not ud.checkout_destdir:
+ # for main git repo, checkout destdir corresponds with unpack destdir
+ ud.checkout_destdir = ud.destdir
+
ret = self.process_submodules(ud, ud.destdir, unpack_submodules, d)
if not ud.bareclone and ret:
@@ -242,6 +242,7 @@ class Hg(FetchMethod):
revflag = "-r %s" % ud.revision
subdir = ud.parm.get("destsuffix", ud.module)
codir = "%s/%s" % (destdir, subdir)
+ ud.destdir = codir
scmdata = ud.parm.get("scmdata", "")
if scmdata != "nokeep":
@@ -298,6 +298,7 @@ class Npm(FetchMethod):
destsuffix = ud.parm.get("destsuffix", "npm")
destdir = os.path.join(rootdir, destsuffix)
npm_unpack(ud.localpath, destdir, d)
+ ud.destdir = destdir
def clean(self, ud, d):
"""Clean any existing full or partial download"""
@@ -80,6 +80,9 @@ class NpmShrinkWrap(FetchMethod):
def urldata_init(self, ud, d):
"""Init npmsw specific variables within url data"""
+ # initialize module_data (for module source tracing)
+ ud.module_data = []
+
# Get the 'shrinkwrap' parameter
ud.shrinkwrap_file = re.sub(r"^npmsw://", "", ud.url.split(";")[0])
@@ -192,6 +195,7 @@ class NpmShrinkWrap(FetchMethod):
raise ParameterError("Unsupported dependency: %s" % name, ud.url)
ud.deps.append({
+ "name": name,
"url": url,
"localpath": localpath,
"extrapaths": extrapaths,
@@ -266,20 +270,31 @@ class NpmShrinkWrap(FetchMethod):
def unpack(self, ud, rootdir, d):
"""Unpack the downloaded dependencies"""
- destdir = d.getVar("S")
- destsuffix = ud.parm.get("destsuffix")
- if destsuffix:
- destdir = os.path.join(rootdir, destsuffix)
+ # rootdir param is a temporary dir. The real rootdir, where sources are
+ # moved after being traced, is stored in ud.rootdir.
+ destsuffix = ud.parm.get("destsuffix") or os.path.relpath(d.getVar("S"), ud.rootdir)
+ destdir = os.path.join(rootdir, destsuffix)
+ ud.destdir = destdir
bb.utils.mkdirhier(destdir)
bb.utils.copyfile(ud.shrinkwrap_file,
os.path.join(destdir, "npm-shrinkwrap.json"))
+ for dep in ud.deps:
+ dep_destdir = os.path.join(destdir, dep["destsuffix"])
+ dep_parent_destdir = re.sub("/node_modules/"+dep["name"]+"$", "", dep_destdir) # this works also with scoped package names, like @foo/bar
+ ud.module_data.append({
+ "url": dep["url"] or dep["localpath"],
+ "destdir": dep_destdir.rstrip("/"),
+ "parent_destdir": dep_parent_destdir.rstrip("/"),
+ "revision": None
+ })
+
auto = [dep["url"] for dep in ud.deps if not dep["localpath"]]
manual = [dep for dep in ud.deps if dep["localpath"]]
if auto:
- ud.proxy.unpack(destdir, auto)
+ ud.proxy.unpack(destdir, auto, is_module=True)
for dep in manual:
depdestdir = os.path.join(destdir, dep["destsuffix"])
new file mode 100644
@@ -0,0 +1,256 @@
+"""Module implementing a base process for upstream source tracing
+for bb.fetch2.Fetch.unpack()
+
+The process consists of:
+
+- creating a temporary directory where each SRC_URI element is unpacked
+
+- collecting relevant metadata (provenance) for each source file and for every
+ upstream source component, that can be used later on for Software Composition
+ Analysis, SBoM generation, etc.;
+
+- moving everything from the temporary directory to root, and iterate with the
+ next SRC_URI element;
+
+- saving metadata in a json file after all elements have been processed.
+
+It assumes that:
+
+- fetchers store unpack destination dir in urldata.destdir;
+- gitsm and npmsw fetchers store module metadata in urldata.module_data, as a
+ list of dict elements in the following format:
+ [
+ {
+ "url": "<module url>",
+ "destdir": "<module destination path>",
+ "parent_destdir": "<parent module destination path>"
+ "revision": "<git submodule revision (only for gitsm, else None)>"
+ }, ...
+ ]
+- urldata.is_unpacked_archive (boolean) is set to True or False for "file"
+ SRC_URI entries.
+"""
+
+# Copyright (C) 2023 Alberto Pianon <pianon@array.eu>
+#
+# SPDX-License-Identifier: GPL-2.0-only
+#
+
+import os
+import json
+import tempfile
+
+import bb.utils
+import bb.compress.zstd
+
+class TraceException(Exception):
+ pass
+
+def scandir(path):
+ with os.scandir(path) as scan:
+ return { e.name: e for e in scan }
+
+def is_real_dir(e):
+ return e.is_dir() and not e.is_symlink()
+
+def is_real_and_nonempty_dir(e):
+ return is_real_dir(e) and scandir(e.path)
+
+def is_file_or_symlink(e):
+ return e.is_file() or e.is_symlink()
+
+def is_git_dir(e):
+ path_scandir = scandir(e.path)
+ if ".git" in path_scandir:
+ try:
+ bb.process.run(
+ ["git", "rev-parse", "--is-inside-work-tree"], cwd=e.path)
+ return True
+ except bb.process.ExecutionError:
+ return False
+ return False
+
+def check_is_real_dir(path, name):
+ if not os.path.exists(path) or os.path.islink(path) or os.path.isfile(path):
+ raise TraceException(
+ "%s path %s is not a directory" % (name, path))
+
+def move_contents(src_dir, dst_dir):
+ """Move and merge contents from src_dir to dst_dir
+
+ Conflict resolution criteria are explained in bb.tests.trace_base
+
+ It's optimized for fast execution time by using os.scandir and os.rename, so
+ it requires that both src_dir and dst_dir reside in the same filesystem.
+ """
+
+ check_is_real_dir(src_dir, "Source")
+ check_is_real_dir(dst_dir, "Destination")
+
+ if os.lstat(src_dir).st_dev != os.lstat(dst_dir).st_dev:
+ raise TraceException(
+ "Source %s and destination %s must be in the same filesystem" %
+ (src_dir, dst_dir)
+ )
+
+ src_scandir = scandir(src_dir)
+ dst_scandir = scandir(dst_dir)
+
+ for src_name, src in src_scandir.items():
+ dst = dst_scandir.get(src_name)
+ if dst:
+ # handle conflicts
+ if is_real_dir(src) and is_real_and_nonempty_dir(dst):
+ if is_git_dir(src):
+ bb.utils.prunedir(dst.path)
+ else:
+ move_contents(src.path, dst.path)
+ os.rmdir(src.path)
+ continue
+ elif is_real_dir(src) and is_file_or_symlink(dst):
+ os.remove(dst.path)
+ elif is_file_or_symlink(src) and is_real_dir(dst):
+ try:
+ os.rmdir(dst.path)
+ except OSError as e:
+ if e.errno == 39:
+ raise TraceException(
+ "Error while moving %s contents to %s, cannot move"
+ " %s to %s: source is a file or a symlink, while"
+ " destination is a non-empty directory."
+ % (src_dir, dst_dir, src.path, dst.path)
+ )
+ else:
+ raise e
+ dst_path = dst.path if dst else os.path.join(dst_dir, src_name)
+ os.rename(src.path, dst_path)
+
+def findall_files_and_links(path, exclude=[], skip_git_submodules=False):
+ """recusively find all files and links in path, excluding dir and file names
+ in exclude, and excluding git dirs if skip_git_submodules is set to True.
+
+ Returns tuple of sorted lists of file and link paths (sorting is for
+ reproducibility in tests)
+ """
+ files = []
+ links = []
+ with os.scandir(path) as scan:
+ for e in scan:
+ if e.name in exclude:
+ continue
+ if e.is_symlink():
+ links.append(e.path)
+ elif e.is_file():
+ files.append(e.path)
+ elif e.is_dir():
+ if skip_git_submodules and is_git_dir(e):
+ continue
+ _files, _links = findall_files_and_links(
+ e.path, exclude, skip_git_submodules)
+ files += _files
+ links += _links
+ return sorted(files), sorted(links)
+
+class TraceUnpackBase:
+ """base class for implementing a process for upstream source tracing
+ See this module's help for more details on the process.
+
+ This base class implements the process but does not collect any data. It is
+ intended to be subclassed in a separate 'trace' module, implementing
+ _collect_data() and _process_data() methods.
+
+ Method call order:
+ - __init__(): initialize tmpdir and td (trace data)
+ - for each SRC_URI entry unpack:
+ - commit(): go through all files in tmpdir (and in each module subdir
+ in case of gitsm and npmsw fecthers) and commit collected metadata
+ to td
+ - move2root(): moves all files from tmpdir to root
+ - write_data()
+ - close(): delete tmpdir and cache
+ """
+
+ def __init__(self, root, d):
+ """initialize properties and create temporary directory in root
+
+ Temporary unpack dir is created in 'root' to ensure they are in the
+ same filesystem, so files can be quickly moved to 'root' after tracing
+ """
+
+ self.root = root
+ self.d = d
+ self.td = {}
+ if not os.path.exists(root):
+ bb.utils.mkdirhier(root)
+ self.tmpdir = tempfile.mkdtemp(dir=root)
+
+ def commit(self, u, ud):
+ """go through all files in tmpdir and commit collected metadata to td.
+ dive into module subdirs in case of gitsm and npmsw fecthers
+
+ Params are:
+ - u -> str: src uri of the upstream repo/package that is being processed
+ - ud -> bb.fetch2.FetchData: src uri fetch data object; ud.url and u do not correspond when git/npm modules are being processed, so we need both
+ """
+
+ exclude=['.git', '.hg', '.svn']
+
+ # exclude node_modules subdirs (will be separately parsed)
+ if ud.type in ['npm', 'npmsw']:
+ exclude.append('node_modules')
+ # exclude git submodules (will be separately parsed)
+ skip_git_submodules = (ud.type == 'gitsm')
+
+ files, links = findall_files_and_links(
+ ud.destdir, exclude, skip_git_submodules)
+ self._collect_data(u, ud, files, links, ud.destdir)
+
+ if ud.type in ['gitsm', 'npmsw'] and ud.module_data:
+ self._process_module_data(ud)
+ for md in ud.module_data:
+ files, links = findall_files_and_links(
+ md["destdir"], exclude, skip_git_submodules)
+ self._collect_data(
+ md["url"], ud, files, links, md["destdir"], md)
+
+ def _process_module_data(self, ud):
+ """add parent module data to each module data item, to map dependencies
+ """
+ revision = ud.revisions[ud.names[0]] if ud.type == 'gitsm' else None
+ indexed_md = { md["destdir"]: md for md in ud.module_data }
+ # add main git repo (gitsm) or npm-shrinkwrap.json (npmsw)
+ indexed_md.update({
+ ud.destdir.rstrip("/"): {"url": ud.url, "revision": revision}
+ })
+ for md in ud.module_data:
+ md["parent_md"] = indexed_md[md["parent_destdir"]]
+
+ def move2root(self):
+ """move all files from temporary directory to root"""
+ move_contents(self.tmpdir, self.root)
+
+ def write_data(self):
+ self._process_data()
+ if not self.d.getVar("PN"):
+ return
+ if not os.path.exists("%s/temp" % self.root):
+ bb.utils.mkdirhier("%s/temp" % self.root)
+ path = "%s/temp/%s-%s.unpack.trace.json.zst" % (
+ self.root, self.d.getVar("PN"), self.d.getVar("PV"))
+ with bb.compress.zstd.open(path, "wt", encoding="utf-8") as f:
+ json.dump(self.td, f)
+ f.flush()
+
+ def close(self):
+ os.rmdir(self.tmpdir)
+ del self.td
+
+ def _collect_data(self, u, ud, files, links, destdir, md=None):
+ """
+ collect provenance metadata on the committed files. Not implemented
+ """
+ pass
+
+ def _process_data(self):
+ """post-process self.td. Not implemented"""
+ pass
\ No newline at end of file
new file mode 100644
@@ -0,0 +1,227 @@
+
+# Copyright (C) 2023 Alberto Pianon <pianon@array.eu>
+#
+# SPDX-License-Identifier: GPL-2.0-only
+#
+
+import os
+import re
+import unittest
+import tempfile
+from pathlib import Path
+import subprocess
+
+import bb
+
+def create_src_dst(tmpdir):
+ src_dir = os.path.join(tmpdir, "src/")
+ dst_dir = os.path.join(tmpdir, "dst/")
+ os.makedirs(src_dir)
+ os.makedirs(dst_dir)
+ return Path(src_dir), Path(dst_dir)
+
+def make_dirname(path):
+ dirname = os.path.dirname(path)
+ if dirname:
+ os.makedirs(dirname, exist_ok=True)
+
+def create_file(path, content):
+ make_dirname(path)
+ with open(path, "w") as f:
+ f.write(content)
+
+def create_link(path, target):
+ make_dirname(path)
+ os.symlink(target, path)
+
+def get_tree(path):
+ curdir = os.getcwd()
+ os.chdir(path)
+ tree = []
+ for root, dirs, files in os.walk("."):
+ for f in dirs + files:
+ tree.append(re.sub(r"^\.\/", "", os.path.join(root, f)))
+ os.chdir(curdir)
+ return sorted(tree)
+
+def read_file(path):
+ with open(path) as f:
+ return f.read()
+
+class MoveContentsTest(unittest.TestCase):
+ """
+ Test the following conflict resolution criteria:
+
+ - if a file (or symlink) exists both in src_dir and in dst_dir, the
+ file/symlink in dst_dir will be overwritten;
+
+ - if a subdirectory exists both in src_dir and in dst_dir, their contents
+ will be merged, and in case of file/symlink conflicts, files/symlinks in
+ dst_dir will be overwritten - unless src_dir is a git repo; in such a
+ case, dst_dir will be pruned and src_dir will be moved to dst_dir, for
+ consistency with bb.fetch2.git.Git.unpack method's behavior (which prunes
+ clone dir if already existing, before cloning)
+
+ - if the same relative path exists both in src_dir and in dst_dir, but the
+ path in src_dir is a directory and the path in dst_dir is a file/symlink,
+ the latter will be overwritten;
+
+ - if instead the path in src_dir is a file and the path in dst_dir is a
+ directory, the latter will be overwritten only if it is empty, otherwise
+ an exception will be raised.
+ """
+
+ def test_dir_merge_and_file_overwrite(self):
+ with tempfile.TemporaryDirectory() as tmpdir:
+ src_dir, dst_dir = create_src_dst(tmpdir)
+ create_file(src_dir / "dir/subdir/file.txt", "new")
+ create_file(dst_dir / "dir/subdir/file.txt", "old")
+ create_file(dst_dir / "dir/subdir/file1.txt", "old")
+ bb.fetch2.trace_base.move_contents(src_dir, dst_dir)
+ expected_dst_tree = [
+ "dir",
+ "dir/subdir",
+ "dir/subdir/file.txt",
+ "dir/subdir/file1.txt"
+ ]
+ self.assertEqual(get_tree(src_dir), [])
+ self.assertEqual(get_tree(dst_dir), expected_dst_tree)
+ self.assertEqual(read_file(dst_dir / "dir/subdir/file.txt"), "new")
+ self.assertEqual(read_file(dst_dir / "dir/subdir/file1.txt"), "old")
+
+ def test_file_vs_symlink_conflicts(self):
+ with tempfile.TemporaryDirectory() as tmpdir:
+ src_dir, dst_dir = create_src_dst(tmpdir)
+
+ create_file(src_dir / "dir/subdir/fileA.txt", "new")
+ create_file(src_dir / "dir/fileB.txt", "new")
+ create_link(src_dir / "file.txt", "dir/subdir/fileA.txt")
+
+ create_file(dst_dir / "dir/subdir/fileA.txt", "old")
+ create_link(dst_dir / "dir/fileB.txt", "subdir/fileA.txt")
+ create_file(dst_dir / "file.txt", "old")
+
+ bb.fetch2.trace_base.move_contents(src_dir, dst_dir)
+ self.assertEqual(get_tree(src_dir), [])
+ self.assertTrue(os.path.islink(dst_dir / "file.txt"))
+ self.assertEqual(
+ os.readlink(dst_dir / "file.txt"),
+ "dir/subdir/fileA.txt"
+ )
+ self.assertFalse(os.path.islink(dst_dir / "dir/fileB.txt"))
+ self.assertEqual(read_file(dst_dir / "dir/fileB.txt"), "new")
+
+ def test_dir_vs_file_conflict(self):
+ with tempfile.TemporaryDirectory() as tmpdir:
+ src_dir, dst_dir = create_src_dst(tmpdir)
+ create_file(src_dir / "items/item0/content.txt", "hello")
+ create_file(dst_dir / "items/item0", "there")
+ bb.fetch2.trace_base.move_contents(src_dir, dst_dir)
+ self.assertEqual(get_tree(src_dir), [])
+ self.assertTrue(os.path.isdir(dst_dir / "items/item0"))
+ self.assertEqual(
+ read_file(dst_dir / "items/item0/content.txt"), "hello")
+
+ def test_dir_vs_symlink_conflict(self):
+ with tempfile.TemporaryDirectory() as tmpdir:
+ src_dir, dst_dir = create_src_dst(tmpdir)
+ create_file(src_dir / "items/item0/content.txt", "hello")
+ create_file(dst_dir / "items/item1/content.txt", "there")
+ create_link(dst_dir / "items/item0", "item1")
+ bb.fetch2.trace_base.move_contents(src_dir, dst_dir)
+ self.assertEqual(get_tree(src_dir), [])
+ self.assertFalse(os.path.islink(dst_dir / "items/item0"))
+ self.assertEqual(
+ read_file(dst_dir / "items/item0/content.txt"), "hello")
+ self.assertEqual(
+ read_file(dst_dir / "items/item1/content.txt"), "there")
+
+ def test_symlink_vs_empty_dir_conflict(self):
+ with tempfile.TemporaryDirectory() as tmpdir:
+ src_dir, dst_dir = create_src_dst(tmpdir)
+ create_file(src_dir / "items/item1/content.txt", "there")
+ create_link(src_dir / "items/item0", "item1")
+ os.makedirs(dst_dir / "items/item0")
+ bb.fetch2.trace_base.move_contents(src_dir, dst_dir)
+ self.assertEqual(get_tree(src_dir), [])
+ self.assertTrue(os.path.islink(dst_dir / "items/item0"))
+ self.assertEqual(read_file(dst_dir / "items/item0/content.txt"), "there")
+
+ def test_symlink_vs_nonempty_dir_conflict(self):
+ with tempfile.TemporaryDirectory() as tmpdir:
+ src_dir, dst_dir = create_src_dst(tmpdir)
+ create_file(src_dir / "items/item1/content.txt", "there")
+ create_link(src_dir / "items/item0", "item1")
+ create_file(dst_dir / "items/item0/content.txt", "hello")
+ with self.assertRaises(bb.fetch2.trace_base.TraceException) as context:
+ bb.fetch2.trace_base.move_contents(src_dir, dst_dir)
+
+ def test_file_vs_empty_dir_conflict(self):
+ with tempfile.TemporaryDirectory() as tmpdir:
+ src_dir, dst_dir = create_src_dst(tmpdir)
+ create_file(src_dir / "items/item0", "test")
+ os.makedirs(dst_dir / "items/item0")
+ bb.fetch2.trace_base.move_contents(src_dir, dst_dir)
+ self.assertEqual(get_tree(src_dir), [])
+ self.assertTrue(os.path.isfile(dst_dir/ "items/item0"))
+
+ def test_file_vs_nonempty_dir_conflict(self):
+ with tempfile.TemporaryDirectory() as tmpdir:
+ src_dir, dst_dir = create_src_dst(tmpdir)
+ create_file(src_dir / "items/item0", "test")
+ create_file(dst_dir / "items/item0/content.txt", "test")
+ with self.assertRaises(bb.fetch2.trace_base.TraceException) as context:
+ bb.fetch2.trace_base.move_contents(src_dir, dst_dir)
+
+ def test_git_dir(self):
+ with tempfile.TemporaryDirectory() as tmpdir:
+ src_dir, dst_dir = create_src_dst(tmpdir)
+ git_repo = src_dir / "src/my_git_repo"
+ create_file(git_repo / "foo.txt", "hello")
+ subprocess.check_output(["git", "init"], cwd=git_repo)
+ create_file(dst_dir / "src/my_git_repo/content.txt", "there")
+ bb.fetch2.trace_base.move_contents(src_dir, dst_dir)
+ self.assertFalse(
+ os.path.exists(dst_dir / "src/my_git_repo/content.txt"))
+ # git clone dir should be pruned if already existing
+ self.assertEqual(
+ read_file(dst_dir / "src/my_git_repo/foo.txt"), "hello")
+ self.assertTrue(os.path.isdir(dst_dir / "src/my_git_repo/.git"))
+
+
+class FindAllFilesAndLinksTest(unittest.TestCase):
+ """test if all files and links are correctly returned, and if specific
+ file/dir names and git subdirs are correctly excluded"""
+
+ def test_findall_files_and_links(self):
+ with tempfile.TemporaryDirectory() as tmpdir:
+ tmpdir = Path(tmpdir)
+ files = {
+ str(tmpdir/"foo/example/example.txt"): "example",
+ str(tmpdir/"foo/foo.txt"): "foo",
+ str(tmpdir/"foo/foo2.txt"): "foo2",
+ str(tmpdir/"README"): "hello",
+ }
+ ignored = {
+ str(tmpdir/".git"): "fake",
+ str(tmpdir/"foo2/dummy"): "dummy"
+ }
+ allfiles = files.copy()
+ allfiles.update(ignored)
+ links = {
+ str(tmpdir/"example"): "foo/example", # link to dir
+ str(tmpdir/"example.txt"): "foo/example/example.txt", # link to file
+ }
+ for path, content in allfiles.items():
+ create_file(path, content)
+ for path, target in links.items():
+ create_link(path, target)
+ subprocess.check_output(["git", "init"], cwd=tmpdir/"foo2")
+ res_files, res_links = bb.fetch2.trace_base.findall_files_and_links(
+ tmpdir, exclude=['.git'], skip_git_submodules=True)
+ self.assertEqual(res_files, sorted(list(files.keys())))
+ self.assertEqual(res_links, sorted(list(links.keys())))
+
+
+if __name__ == '__main__':
+ unittest.main()