Message ID | 20250129112406.1660522-2-stefan-koch@siemens.com |
---|---|
State | New |
Headers | show |
Series | fetch2/git: Improve shallow, lfs, and tag support | expand |
This, and the other patches have no tests via bitbake-selftest. Can you look into adding some? Alex On Wed, 29 Jan 2025 at 12:24, Koch, Stefan via lists.openembedded.org <stefan-koch=siemens.com@lists.openembedded.org> wrote: > > When `ud.shallow == 1`: > - Prefer an initial shallow clone over an initial bare clone, > while still utilizing any already existing bare clones. > > This improves: > - Solves timeout issues during initial clones on slow internet connections > by reducing the amount of data transferred. > - Eliminates the need to use a HTTPS tarball SRC_URI > to reduce data transfer. > - Allows SSH-based authentication (e.g. cert and agent-based) when > using non-public repos, so additional HTTPS tokens may not be required. > > Signed-off-by: Stefan Koch <stefan-koch@siemens.com> > --- > lib/bb/fetch2/git.py | 92 ++++++++++++++++++++++++++++++++++---------- > 1 file changed, 71 insertions(+), 21 deletions(-) > > diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py > index 6badda597..6d87c2f18 100644 > --- a/lib/bb/fetch2/git.py > +++ b/lib/bb/fetch2/git.py > @@ -366,6 +366,33 @@ class Git(FetchMethod): > def tarball_need_update(self, ud): > return ud.write_tarballs and not os.path.exists(ud.fullmirror) > > + # Helper method for fetching Git LFS data > + def lfs_fetch(self, ud, d, clonedir, revision, progresshandler, fetchall=False): > + try: > + if self._need_lfs(ud) and self._contains_lfs(ud, d, clonedir) and self._find_git_lfs(d) and len(revision): > + # Using worktree with the revision because .lfsconfig may exists > + worktree_add_cmd = "%s worktree add wt %s" % (ud.basecmd, revision) > + runfetchcmd(worktree_add_cmd, d, log=progresshandler, workdir=clonedir) > + lfs_fetch_cmd = "%s lfs fetch %s" % (ud.basecmd, "--all" if fetchall else "") > + runfetchcmd(lfs_fetch_cmd, d, log=progresshandler, workdir=(clonedir + "/wt")) > + worktree_rem_cmd = "%s worktree remove -f wt" % ud.basecmd > + runfetchcmd(worktree_rem_cmd, d, log=progresshandler, workdir=clonedir) > + except: > + logger.warning("Fetching LFS did not succeed.") > + > + # Create as a temp file and move atomically into position to avoid races > + @contextmanager > + def create_atomic(self, filename): > + fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename)) > + try: > + yield tfile > + umask = os.umask(0o666) > + os.umask(umask) > + os.chmod(tfile, (0o666 & ~umask)) > + os.rename(tfile, filename) > + finally: > + os.close(fd) > + > def try_premirror(self, ud, d): > # If we don't do this, updating an existing checkout with only premirrors > # is not possible > @@ -446,7 +473,40 @@ class Git(FetchMethod): > if ud.proto.lower() != 'file': > bb.fetch2.check_network_access(d, clone_cmd, ud.url) > progresshandler = GitProgressHandler(d) > - runfetchcmd(clone_cmd, d, log=progresshandler) > + > + # When ud.shallow is enabled: > + # Try creating an initial shallow clone > + shallowstate = False > + if ud.shallow: > + tempdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR')) > + shallowclone = os.path.join(tempdir, 'git') > + try: > + self.clone_shallow_local(ud, shallowclone, d) > + shallowstate = True > + except: > + logger.warning("Creating initial shallow clone failed, try regular clone now.") > + > + # When the shallow clone has succeeded: > + # Create shallow tarball > + if shallowstate: > + logger.info("Creating tarball of git repository") > + with self.create_atomic(ud.fullshallow) as tfile: > + runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone) > + runfetchcmd("touch %s.done" % ud.fullshallow, d) > + > + # Always cleanup tempdir > + bb.utils.remove(tempdir, recurse=True) > + > + # When the shallow clone has succeeded: > + # Use shallow tarball > + if shallowstate: > + ud.localpath = ud.fullshallow > + return > + > + # When ud.shallow is disabled or the shallow clone failed: > + # Create an initial regular clone > + if not shallowstate: > + runfetchcmd(clone_cmd, d, log=progresshandler) > > # Update the checkout if needed > if self.clonedir_need_update(ud, d): > @@ -509,20 +569,6 @@ class Git(FetchMethod): > runfetchcmd("tar -cf - lfs | tar -xf - -C %s" % ud.clonedir, d, workdir="%s/.git" % ud.destdir) > > def build_mirror_data(self, ud, d): > - > - # Create as a temp file and move atomically into position to avoid races > - @contextmanager > - def create_atomic(filename): > - fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename)) > - try: > - yield tfile > - umask = os.umask(0o666) > - os.umask(umask) > - os.chmod(tfile, (0o666 & ~umask)) > - os.rename(tfile, filename) > - finally: > - os.close(fd) > - > if ud.shallow and ud.write_shallow_tarballs: > if not os.path.exists(ud.fullshallow): > if os.path.islink(ud.fullshallow): > @@ -533,7 +579,7 @@ class Git(FetchMethod): > self.clone_shallow_local(ud, shallowclone, d) > > logger.info("Creating tarball of git repository") > - with create_atomic(ud.fullshallow) as tfile: > + with self.create_atomic(ud.fullshallow) as tfile: > runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone) > runfetchcmd("touch %s.done" % ud.fullshallow, d) > finally: > @@ -543,7 +589,7 @@ class Git(FetchMethod): > os.unlink(ud.fullmirror) > > logger.info("Creating tarball of git repository") > - with create_atomic(ud.fullmirror) as tfile: > + with self.create_atomic(ud.fullmirror) as tfile: > mtime = runfetchcmd("{} log --all -1 --format=%cD".format(ud.basecmd), d, > quiet=True, workdir=ud.clonedir) > runfetchcmd("tar -czf %s --owner oe:0 --group oe:0 --mtime \"%s\" ." > @@ -557,12 +603,15 @@ class Git(FetchMethod): > - For BB_GIT_SHALLOW_REVS: git fetch --shallow-exclude=<revs> rev > """ > > + progresshandler = GitProgressHandler(d) > + repourl = self._get_repo_url(ud) > bb.utils.mkdirhier(dest) > init_cmd = "%s init -q" % ud.basecmd > if ud.bareclone: > init_cmd += " --bare" > runfetchcmd(init_cmd, d, workdir=dest) > - runfetchcmd("%s remote add origin %s" % (ud.basecmd, ud.clonedir), d, workdir=dest) > + # Use repourl when creating the initial shallow clone > + runfetchcmd("%s remote add origin %s" % (ud.basecmd, shlex.quote(repourl) if ud.shallow and not os.path.exists(ud.clonedir) else ud.clonedir), d, workdir=dest) > > # Check the histories which should be excluded > shallow_exclude = '' > @@ -600,10 +649,12 @@ class Git(FetchMethod): > # The ud.clonedir is a local temporary dir, will be removed when > # fetch is done, so we can do anything on it. > adv_cmd = 'git branch -f advertise-%s %s' % (revision, revision) > - runfetchcmd(adv_cmd, d, workdir=ud.clonedir) > + if not ud.shallow: > + runfetchcmd(adv_cmd, d, workdir=ud.clonedir) > > - runfetchcmd(fetch_cmd, d, workdir=dest) > + runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=dest) > runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest) > + self.lfs_fetch(ud, d, dest, ud.revisions[ud.names[0]], progresshandler) > > # Apply extra ref wildcards > all_refs_remote = runfetchcmd("%s ls-remote origin 'refs/*'" % ud.basecmd, \ > @@ -629,7 +680,6 @@ class Git(FetchMethod): > runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest) > > # The url is local ud.clonedir, set it to upstream one > - repourl = self._get_repo_url(ud) > runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=dest) > > def unpack(self, ud, destdir, d): > -- > 2.39.5 > > > -=-=-=-=-=-=-=-=-=-=-=- > Links: You receive all messages sent to this group. > View/Reply Online (#17109): https://lists.openembedded.org/g/bitbake-devel/message/17109 > Mute This Topic: https://lists.openembedded.org/mt/110876221/1686489 > Group Owner: bitbake-devel+owner@lists.openembedded.org > Unsubscribe: https://lists.openembedded.org/g/bitbake-devel/unsub [alex.kanavin@gmail.com] > -=-=-=-=-=-=-=-=-=-=-=- >
diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py index 6badda597..6d87c2f18 100644 --- a/lib/bb/fetch2/git.py +++ b/lib/bb/fetch2/git.py @@ -366,6 +366,33 @@ class Git(FetchMethod): def tarball_need_update(self, ud): return ud.write_tarballs and not os.path.exists(ud.fullmirror) + # Helper method for fetching Git LFS data + def lfs_fetch(self, ud, d, clonedir, revision, progresshandler, fetchall=False): + try: + if self._need_lfs(ud) and self._contains_lfs(ud, d, clonedir) and self._find_git_lfs(d) and len(revision): + # Using worktree with the revision because .lfsconfig may exists + worktree_add_cmd = "%s worktree add wt %s" % (ud.basecmd, revision) + runfetchcmd(worktree_add_cmd, d, log=progresshandler, workdir=clonedir) + lfs_fetch_cmd = "%s lfs fetch %s" % (ud.basecmd, "--all" if fetchall else "") + runfetchcmd(lfs_fetch_cmd, d, log=progresshandler, workdir=(clonedir + "/wt")) + worktree_rem_cmd = "%s worktree remove -f wt" % ud.basecmd + runfetchcmd(worktree_rem_cmd, d, log=progresshandler, workdir=clonedir) + except: + logger.warning("Fetching LFS did not succeed.") + + # Create as a temp file and move atomically into position to avoid races + @contextmanager + def create_atomic(self, filename): + fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename)) + try: + yield tfile + umask = os.umask(0o666) + os.umask(umask) + os.chmod(tfile, (0o666 & ~umask)) + os.rename(tfile, filename) + finally: + os.close(fd) + def try_premirror(self, ud, d): # If we don't do this, updating an existing checkout with only premirrors # is not possible @@ -446,7 +473,40 @@ class Git(FetchMethod): if ud.proto.lower() != 'file': bb.fetch2.check_network_access(d, clone_cmd, ud.url) progresshandler = GitProgressHandler(d) - runfetchcmd(clone_cmd, d, log=progresshandler) + + # When ud.shallow is enabled: + # Try creating an initial shallow clone + shallowstate = False + if ud.shallow: + tempdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR')) + shallowclone = os.path.join(tempdir, 'git') + try: + self.clone_shallow_local(ud, shallowclone, d) + shallowstate = True + except: + logger.warning("Creating initial shallow clone failed, try regular clone now.") + + # When the shallow clone has succeeded: + # Create shallow tarball + if shallowstate: + logger.info("Creating tarball of git repository") + with self.create_atomic(ud.fullshallow) as tfile: + runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone) + runfetchcmd("touch %s.done" % ud.fullshallow, d) + + # Always cleanup tempdir + bb.utils.remove(tempdir, recurse=True) + + # When the shallow clone has succeeded: + # Use shallow tarball + if shallowstate: + ud.localpath = ud.fullshallow + return + + # When ud.shallow is disabled or the shallow clone failed: + # Create an initial regular clone + if not shallowstate: + runfetchcmd(clone_cmd, d, log=progresshandler) # Update the checkout if needed if self.clonedir_need_update(ud, d): @@ -509,20 +569,6 @@ class Git(FetchMethod): runfetchcmd("tar -cf - lfs | tar -xf - -C %s" % ud.clonedir, d, workdir="%s/.git" % ud.destdir) def build_mirror_data(self, ud, d): - - # Create as a temp file and move atomically into position to avoid races - @contextmanager - def create_atomic(filename): - fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename)) - try: - yield tfile - umask = os.umask(0o666) - os.umask(umask) - os.chmod(tfile, (0o666 & ~umask)) - os.rename(tfile, filename) - finally: - os.close(fd) - if ud.shallow and ud.write_shallow_tarballs: if not os.path.exists(ud.fullshallow): if os.path.islink(ud.fullshallow): @@ -533,7 +579,7 @@ class Git(FetchMethod): self.clone_shallow_local(ud, shallowclone, d) logger.info("Creating tarball of git repository") - with create_atomic(ud.fullshallow) as tfile: + with self.create_atomic(ud.fullshallow) as tfile: runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone) runfetchcmd("touch %s.done" % ud.fullshallow, d) finally: @@ -543,7 +589,7 @@ class Git(FetchMethod): os.unlink(ud.fullmirror) logger.info("Creating tarball of git repository") - with create_atomic(ud.fullmirror) as tfile: + with self.create_atomic(ud.fullmirror) as tfile: mtime = runfetchcmd("{} log --all -1 --format=%cD".format(ud.basecmd), d, quiet=True, workdir=ud.clonedir) runfetchcmd("tar -czf %s --owner oe:0 --group oe:0 --mtime \"%s\" ." @@ -557,12 +603,15 @@ class Git(FetchMethod): - For BB_GIT_SHALLOW_REVS: git fetch --shallow-exclude=<revs> rev """ + progresshandler = GitProgressHandler(d) + repourl = self._get_repo_url(ud) bb.utils.mkdirhier(dest) init_cmd = "%s init -q" % ud.basecmd if ud.bareclone: init_cmd += " --bare" runfetchcmd(init_cmd, d, workdir=dest) - runfetchcmd("%s remote add origin %s" % (ud.basecmd, ud.clonedir), d, workdir=dest) + # Use repourl when creating the initial shallow clone + runfetchcmd("%s remote add origin %s" % (ud.basecmd, shlex.quote(repourl) if ud.shallow and not os.path.exists(ud.clonedir) else ud.clonedir), d, workdir=dest) # Check the histories which should be excluded shallow_exclude = '' @@ -600,10 +649,12 @@ class Git(FetchMethod): # The ud.clonedir is a local temporary dir, will be removed when # fetch is done, so we can do anything on it. adv_cmd = 'git branch -f advertise-%s %s' % (revision, revision) - runfetchcmd(adv_cmd, d, workdir=ud.clonedir) + if not ud.shallow: + runfetchcmd(adv_cmd, d, workdir=ud.clonedir) - runfetchcmd(fetch_cmd, d, workdir=dest) + runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=dest) runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest) + self.lfs_fetch(ud, d, dest, ud.revisions[ud.names[0]], progresshandler) # Apply extra ref wildcards all_refs_remote = runfetchcmd("%s ls-remote origin 'refs/*'" % ud.basecmd, \ @@ -629,7 +680,6 @@ class Git(FetchMethod): runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest) # The url is local ud.clonedir, set it to upstream one - repourl = self._get_repo_url(ud) runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=dest) def unpack(self, ud, destdir, d):
When `ud.shallow == 1`: - Prefer an initial shallow clone over an initial bare clone, while still utilizing any already existing bare clones. This improves: - Solves timeout issues during initial clones on slow internet connections by reducing the amount of data transferred. - Eliminates the need to use a HTTPS tarball SRC_URI to reduce data transfer. - Allows SSH-based authentication (e.g. cert and agent-based) when using non-public repos, so additional HTTPS tokens may not be required. Signed-off-by: Stefan Koch <stefan-koch@siemens.com> --- lib/bb/fetch2/git.py | 92 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 71 insertions(+), 21 deletions(-)