| Message ID | 20250129112406.1660522-2-stefan-koch@siemens.com |
|---|---|
| State | Accepted, archived |
| Commit | 457288b2fda86fd00cdcaefac616129b0029e1f9 |
| Headers | show |
| Series | fetch2/git: Improve shallow, lfs, and tag support | expand |
This, and the other patches have no tests via bitbake-selftest. Can you look into adding some? Alex On Wed, 29 Jan 2025 at 12:24, Koch, Stefan via lists.openembedded.org <stefan-koch=siemens.com@lists.openembedded.org> wrote: > > When `ud.shallow == 1`: > - Prefer an initial shallow clone over an initial bare clone, > while still utilizing any already existing bare clones. > > This improves: > - Solves timeout issues during initial clones on slow internet connections > by reducing the amount of data transferred. > - Eliminates the need to use a HTTPS tarball SRC_URI > to reduce data transfer. > - Allows SSH-based authentication (e.g. cert and agent-based) when > using non-public repos, so additional HTTPS tokens may not be required. > > Signed-off-by: Stefan Koch <stefan-koch@siemens.com> > --- > lib/bb/fetch2/git.py | 92 ++++++++++++++++++++++++++++++++++---------- > 1 file changed, 71 insertions(+), 21 deletions(-) > > diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py > index 6badda597..6d87c2f18 100644 > --- a/lib/bb/fetch2/git.py > +++ b/lib/bb/fetch2/git.py > @@ -366,6 +366,33 @@ class Git(FetchMethod): > def tarball_need_update(self, ud): > return ud.write_tarballs and not os.path.exists(ud.fullmirror) > > + # Helper method for fetching Git LFS data > + def lfs_fetch(self, ud, d, clonedir, revision, progresshandler, fetchall=False): > + try: > + if self._need_lfs(ud) and self._contains_lfs(ud, d, clonedir) and self._find_git_lfs(d) and len(revision): > + # Using worktree with the revision because .lfsconfig may exists > + worktree_add_cmd = "%s worktree add wt %s" % (ud.basecmd, revision) > + runfetchcmd(worktree_add_cmd, d, log=progresshandler, workdir=clonedir) > + lfs_fetch_cmd = "%s lfs fetch %s" % (ud.basecmd, "--all" if fetchall else "") > + runfetchcmd(lfs_fetch_cmd, d, log=progresshandler, workdir=(clonedir + "/wt")) > + worktree_rem_cmd = "%s worktree remove -f wt" % ud.basecmd > + runfetchcmd(worktree_rem_cmd, d, log=progresshandler, workdir=clonedir) > + except: > + logger.warning("Fetching LFS did not succeed.") > + > + # Create as a temp file and move atomically into position to avoid races > + @contextmanager > + def create_atomic(self, filename): > + fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename)) > + try: > + yield tfile > + umask = os.umask(0o666) > + os.umask(umask) > + os.chmod(tfile, (0o666 & ~umask)) > + os.rename(tfile, filename) > + finally: > + os.close(fd) > + > def try_premirror(self, ud, d): > # If we don't do this, updating an existing checkout with only premirrors > # is not possible > @@ -446,7 +473,40 @@ class Git(FetchMethod): > if ud.proto.lower() != 'file': > bb.fetch2.check_network_access(d, clone_cmd, ud.url) > progresshandler = GitProgressHandler(d) > - runfetchcmd(clone_cmd, d, log=progresshandler) > + > + # When ud.shallow is enabled: > + # Try creating an initial shallow clone > + shallowstate = False > + if ud.shallow: > + tempdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR')) > + shallowclone = os.path.join(tempdir, 'git') > + try: > + self.clone_shallow_local(ud, shallowclone, d) > + shallowstate = True > + except: > + logger.warning("Creating initial shallow clone failed, try regular clone now.") > + > + # When the shallow clone has succeeded: > + # Create shallow tarball > + if shallowstate: > + logger.info("Creating tarball of git repository") > + with self.create_atomic(ud.fullshallow) as tfile: > + runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone) > + runfetchcmd("touch %s.done" % ud.fullshallow, d) > + > + # Always cleanup tempdir > + bb.utils.remove(tempdir, recurse=True) > + > + # When the shallow clone has succeeded: > + # Use shallow tarball > + if shallowstate: > + ud.localpath = ud.fullshallow > + return > + > + # When ud.shallow is disabled or the shallow clone failed: > + # Create an initial regular clone > + if not shallowstate: > + runfetchcmd(clone_cmd, d, log=progresshandler) > > # Update the checkout if needed > if self.clonedir_need_update(ud, d): > @@ -509,20 +569,6 @@ class Git(FetchMethod): > runfetchcmd("tar -cf - lfs | tar -xf - -C %s" % ud.clonedir, d, workdir="%s/.git" % ud.destdir) > > def build_mirror_data(self, ud, d): > - > - # Create as a temp file and move atomically into position to avoid races > - @contextmanager > - def create_atomic(filename): > - fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename)) > - try: > - yield tfile > - umask = os.umask(0o666) > - os.umask(umask) > - os.chmod(tfile, (0o666 & ~umask)) > - os.rename(tfile, filename) > - finally: > - os.close(fd) > - > if ud.shallow and ud.write_shallow_tarballs: > if not os.path.exists(ud.fullshallow): > if os.path.islink(ud.fullshallow): > @@ -533,7 +579,7 @@ class Git(FetchMethod): > self.clone_shallow_local(ud, shallowclone, d) > > logger.info("Creating tarball of git repository") > - with create_atomic(ud.fullshallow) as tfile: > + with self.create_atomic(ud.fullshallow) as tfile: > runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone) > runfetchcmd("touch %s.done" % ud.fullshallow, d) > finally: > @@ -543,7 +589,7 @@ class Git(FetchMethod): > os.unlink(ud.fullmirror) > > logger.info("Creating tarball of git repository") > - with create_atomic(ud.fullmirror) as tfile: > + with self.create_atomic(ud.fullmirror) as tfile: > mtime = runfetchcmd("{} log --all -1 --format=%cD".format(ud.basecmd), d, > quiet=True, workdir=ud.clonedir) > runfetchcmd("tar -czf %s --owner oe:0 --group oe:0 --mtime \"%s\" ." > @@ -557,12 +603,15 @@ class Git(FetchMethod): > - For BB_GIT_SHALLOW_REVS: git fetch --shallow-exclude=<revs> rev > """ > > + progresshandler = GitProgressHandler(d) > + repourl = self._get_repo_url(ud) > bb.utils.mkdirhier(dest) > init_cmd = "%s init -q" % ud.basecmd > if ud.bareclone: > init_cmd += " --bare" > runfetchcmd(init_cmd, d, workdir=dest) > - runfetchcmd("%s remote add origin %s" % (ud.basecmd, ud.clonedir), d, workdir=dest) > + # Use repourl when creating the initial shallow clone > + runfetchcmd("%s remote add origin %s" % (ud.basecmd, shlex.quote(repourl) if ud.shallow and not os.path.exists(ud.clonedir) else ud.clonedir), d, workdir=dest) > > # Check the histories which should be excluded > shallow_exclude = '' > @@ -600,10 +649,12 @@ class Git(FetchMethod): > # The ud.clonedir is a local temporary dir, will be removed when > # fetch is done, so we can do anything on it. > adv_cmd = 'git branch -f advertise-%s %s' % (revision, revision) > - runfetchcmd(adv_cmd, d, workdir=ud.clonedir) > + if not ud.shallow: > + runfetchcmd(adv_cmd, d, workdir=ud.clonedir) > > - runfetchcmd(fetch_cmd, d, workdir=dest) > + runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=dest) > runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest) > + self.lfs_fetch(ud, d, dest, ud.revisions[ud.names[0]], progresshandler) > > # Apply extra ref wildcards > all_refs_remote = runfetchcmd("%s ls-remote origin 'refs/*'" % ud.basecmd, \ > @@ -629,7 +680,6 @@ class Git(FetchMethod): > runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest) > > # The url is local ud.clonedir, set it to upstream one > - repourl = self._get_repo_url(ud) > runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=dest) > > def unpack(self, ud, destdir, d): > -- > 2.39.5 > > > -=-=-=-=-=-=-=-=-=-=-=- > Links: You receive all messages sent to this group. > View/Reply Online (#17109): https://lists.openembedded.org/g/bitbake-devel/message/17109 > Mute This Topic: https://lists.openembedded.org/mt/110876221/1686489 > Group Owner: bitbake-devel+owner@lists.openembedded.org > Unsubscribe: https://lists.openembedded.org/g/bitbake-devel/unsub [alex.kanavin@gmail.com] > -=-=-=-=-=-=-=-=-=-=-=- >
Have made some test case experiments and added a functionality to force
changing ud.shallow by using forceshallow=True.
Otherwise, the test cases have always set ud.shallow by default to 1.
Commented example from lib/bb/tests/fetch.py:
def test_shallow_clone_preferred_over_shallow(self):
self.add_empty_file('a')
self.add_empty_file('b')
# Fetch once to generate the shallow tarball
- fetcher, ud = self.fetch()
+ fetcher, ud = self.fetch(forceshallow=True) # same as default
behaviour
assert os.path.exists(os.path.join(self.dldir, ud.mirrortarballs[0]))
+ fetcher, ud = self.fetch(forceshallow=False) # needed for test case
(with ud.shallow=1) there is no first clone because of fast shallow
cloning (provided by the patch)
# Fetch and unpack with both the clonedir and shallow tarball
available
- fetcher, ud = self.fetch_and_unpack()
+ fetcher, ud = self.fetch_and_unpack(forceshallow=True) # same as
default behaviour
# The unpacked tree should *not* be shallow
self.assertRevCount(2)
assert not os.path.exists(os.path.join(self.gitdir, '.git',
'shallow'))
As described within PATCH 0/3, this is a proof-of-concept patch.
So one important question is:
* Should be the ud.shallow=1 switch used for that? Then many test cases
needs a rework.
* Or should another switch be introduced for that (or use ud.shallow=2,
if possible)?
Stefan
On Wed, 2025-01-29 at 12:35 +0100, Alexander Kanavin wrote:
> > This, and the other patches have no tests via bitbake-selftest. Can
> > you look into adding some?
> >
> > Alex
> >
> > On Wed, 29 Jan 2025 at 12:24, Koch, Stefan via
> > lists.openembedded.org
> > <stefan-koch=siemens.com@lists.openembedded.org> wrote:
> > > >
> > > > When `ud.shallow == 1`:
> > > > - Prefer an initial shallow clone over an initial bare clone,
> > > > while still utilizing any already existing bare clones.
> > > >
> > > > This improves:
> > > > - Solves timeout issues during initial clones on slow internet
> > > > connections
> > > > by reducing the amount of data transferred.
> > > > - Eliminates the need to use a HTTPS tarball SRC_URI
> > > > to reduce data transfer.
> > > > - Allows SSH-based authentication (e.g. cert and agent-based)
> > > > when
> > > > using non-public repos, so additional HTTPS tokens may not be
> > > > required.
> > > >
> > > > Signed-off-by: Stefan Koch <stefan-koch@siemens.com>
> > > > ---
> > > > lib/bb/fetch2/git.py | 92
> > > > ++++++++++++++++++++++++++++++++++----------
> > > > 1 file changed, 71 insertions(+), 21 deletions(-)
> > > >
> > > > diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py
> > > > index 6badda597..6d87c2f18 100644
> > > > --- a/lib/bb/fetch2/git.py
> > > > +++ b/lib/bb/fetch2/git.py
> > > > @@ -366,6 +366,33 @@ class Git(FetchMethod):
> > > > def tarball_need_update(self, ud):
> > > > return ud.write_tarballs and not
> > > > os.path.exists(ud.fullmirror)
> > > >
> > > > + # Helper method for fetching Git LFS data
> > > > + def lfs_fetch(self, ud, d, clonedir, revision,
> > > > progresshandler, fetchall=False):
> > > > + try:
> > > > + if self._need_lfs(ud) and self._contains_lfs(ud,
> > > > d,
> > > > clonedir) and self._find_git_lfs(d) and len(revision):
> > > > + # Using worktree with the revision because
> > > > .lfsconfig may exists
> > > > + worktree_add_cmd = "%s worktree add wt %s" %
> > > > (ud.basecmd, revision)
> > > > + runfetchcmd(worktree_add_cmd, d,
> > > > log=progresshandler, workdir=clonedir)
> > > > + lfs_fetch_cmd = "%s lfs fetch %s" %
> > > > (ud.basecmd,
> > > > "--all" if fetchall else "")
> > > > + runfetchcmd(lfs_fetch_cmd, d,
> > > > log=progresshandler,
> > > > workdir=(clonedir + "/wt"))
> > > > + worktree_rem_cmd = "%s worktree remove -f wt"
> > > > %
> > > > ud.basecmd
> > > > + runfetchcmd(worktree_rem_cmd, d,
> > > > log=progresshandler, workdir=clonedir)
> > > > + except:
> > > > + logger.warning("Fetching LFS did not succeed.")
> > > > +
> > > > + # Create as a temp file and move atomically into position
> > > > to
> > > > avoid races
> > > > + @contextmanager
> > > > + def create_atomic(self, filename):
> > > > + fd, tfile =
> > > > tempfile.mkstemp(dir=os.path.dirname(filename))
> > > > + try:
> > > > + yield tfile
> > > > + umask = os.umask(0o666)
> > > > + os.umask(umask)
> > > > + os.chmod(tfile, (0o666 & ~umask))
> > > > + os.rename(tfile, filename)
> > > > + finally:
> > > > + os.close(fd)
> > > > +
> > > > def try_premirror(self, ud, d):
> > > > # If we don't do this, updating an existing checkout
> > > > with
> > > > only premirrors
> > > > # is not possible
> > > > @@ -446,7 +473,40 @@ class Git(FetchMethod):
> > > > if ud.proto.lower() != 'file':
> > > > bb.fetch2.check_network_access(d, clone_cmd,
> > > > ud.url)
> > > > progresshandler = GitProgressHandler(d)
> > > > - runfetchcmd(clone_cmd, d, log=progresshandler)
> > > > +
> > > > + # When ud.shallow is enabled:
> > > > + # Try creating an initial shallow clone
> > > > + shallowstate = False
> > > > + if ud.shallow:
> > > > + tempdir =
> > > > tempfile.mkdtemp(dir=d.getVar('DL_DIR'))
> > > > + shallowclone = os.path.join(tempdir, 'git')
> > > > + try:
> > > > + self.clone_shallow_local(ud, shallowclone,
> > > > d)
> > > > + shallowstate = True
> > > > + except:
> > > > + logger.warning("Creating initial shallow
> > > > clone
> > > > failed, try regular clone now.")
> > > > +
> > > > + # When the shallow clone has succeeded:
> > > > + # Create shallow tarball
> > > > + if shallowstate:
> > > > + logger.info("Creating tarball of git
> > > > repository")
> > > > + with self.create_atomic(ud.fullshallow) as
> > > > tfile:
> > > > + runfetchcmd("tar -czf %s ." % tfile,
> > > > d,
> > > > workdir=shallowclone)
> > > > + runfetchcmd("touch %s.done" %
> > > > ud.fullshallow,
> > > > d)
> > > > +
> > > > + # Always cleanup tempdir
> > > > + bb.utils.remove(tempdir, recurse=True)
> > > > +
> > > > + # When the shallow clone has succeeded:
> > > > + # Use shallow tarball
> > > > + if shallowstate:
> > > > + ud.localpath = ud.fullshallow
> > > > + return
> > > > +
> > > > + # When ud.shallow is disabled or the shallow clone
> > > > failed:
> > > > + # Create an initial regular clone
> > > > + if not shallowstate:
> > > > + runfetchcmd(clone_cmd, d, log=progresshandler)
> > > >
> > > > # Update the checkout if needed
> > > > if self.clonedir_need_update(ud, d):
> > > > @@ -509,20 +569,6 @@ class Git(FetchMethod):
> > > > runfetchcmd("tar -cf - lfs | tar -xf - -C
> > > > %s"
> > > > % ud.clonedir, d, workdir="%s/.git" % ud.destdir)
> > > >
> > > > def build_mirror_data(self, ud, d):
> > > > -
> > > > - # Create as a temp file and move atomically into
> > > > position
> > > > to avoid races
> > > > - @contextmanager
> > > > - def create_atomic(filename):
> > > > - fd, tfile =
> > > > tempfile.mkstemp(dir=os.path.dirname(filename))
> > > > - try:
> > > > - yield tfile
> > > > - umask = os.umask(0o666)
> > > > - os.umask(umask)
> > > > - os.chmod(tfile, (0o666 & ~umask))
> > > > - os.rename(tfile, filename)
> > > > - finally:
> > > > - os.close(fd)
> > > > -
> > > > if ud.shallow and ud.write_shallow_tarballs:
> > > > if not os.path.exists(ud.fullshallow):
> > > > if os.path.islink(ud.fullshallow):
> > > > @@ -533,7 +579,7 @@ class Git(FetchMethod):
> > > > self.clone_shallow_local(ud, shallowclone,
> > > > d)
> > > >
> > > > logger.info("Creating tarball of git
> > > > repository")
> > > > - with create_atomic(ud.fullshallow) as
> > > > tfile:
> > > > + with self.create_atomic(ud.fullshallow) as
> > > > tfile:
> > > > runfetchcmd("tar -czf %s ." % tfile,
> > > > d,
> > > > workdir=shallowclone)
> > > > runfetchcmd("touch %s.done" %
> > > > ud.fullshallow,
> > > > d)
> > > > finally:
> > > > @@ -543,7 +589,7 @@ class Git(FetchMethod):
> > > > os.unlink(ud.fullmirror)
> > > >
> > > > logger.info("Creating tarball of git repository")
> > > > - with create_atomic(ud.fullmirror) as tfile:
> > > > + with self.create_atomic(ud.fullmirror) as tfile:
> > > > mtime = runfetchcmd("{} log --all -1
> > > > --format=%cD".format(ud.basecmd), d,
> > > > quiet=True, workdir=ud.clonedir)
> > > > runfetchcmd("tar -czf %s --owner oe:0 --group
> > > > oe:0
> > > > --mtime \"%s\" ."
> > > > @@ -557,12 +603,15 @@ class Git(FetchMethod):
> > > > - For BB_GIT_SHALLOW_REVS: git fetch
> > > > --shallow-exclude=<revs> rev
> > > > """
> > > >
> > > > + progresshandler = GitProgressHandler(d)
> > > > + repourl = self._get_repo_url(ud)
> > > > bb.utils.mkdirhier(dest)
> > > > init_cmd = "%s init -q" % ud.basecmd
> > > > if ud.bareclone:
> > > > init_cmd += " --bare"
> > > > runfetchcmd(init_cmd, d, workdir=dest)
> > > > - runfetchcmd("%s remote add origin %s" % (ud.basecmd,
> > > > ud.clonedir), d, workdir=dest)
> > > > + # Use repourl when creating the initial shallow clone
> > > > + runfetchcmd("%s remote add origin %s" % (ud.basecmd,
> > > > shlex.quote(repourl) if ud.shallow and not
> > > > os.path.exists(ud.clonedir) else ud.clonedir), d, workdir=dest)
> > > >
> > > > # Check the histories which should be excluded
> > > > shallow_exclude = ''
> > > > @@ -600,10 +649,12 @@ class Git(FetchMethod):
> > > > # The ud.clonedir is a local temporary dir, will
> > > > be
> > > > removed when
> > > > # fetch is done, so we can do anything on it.
> > > > adv_cmd = 'git branch -f advertise-%s %s' %
> > > > (revision,
> > > > revision)
> > > > - runfetchcmd(adv_cmd, d, workdir=ud.clonedir)
> > > > + if not ud.shallow:
> > > > + runfetchcmd(adv_cmd, d, workdir=ud.clonedir)
> > > >
> > > > - runfetchcmd(fetch_cmd, d, workdir=dest)
> > > > + runfetchcmd(fetch_cmd, d, log=progresshandler,
> > > > workdir=dest)
> > > > runfetchcmd("%s update-ref %s %s" % (ud.basecmd,
> > > > ref,
> > > > revision), d, workdir=dest)
> > > > + self.lfs_fetch(ud, d, dest,
> > > > ud.revisions[ud.names[0]],
> > > > progresshandler)
> > > >
> > > > # Apply extra ref wildcards
> > > > all_refs_remote = runfetchcmd("%s ls-remote origin
> > > > 'refs/*'" % ud.basecmd, \
> > > > @@ -629,7 +680,6 @@ class Git(FetchMethod):
> > > > runfetchcmd("%s update-ref %s %s" % (ud.basecmd,
> > > > ref,
> > > > revision), d, workdir=dest)
> > > >
> > > > # The url is local ud.clonedir, set it to upstream one
> > > > - repourl = self._get_repo_url(ud)
> > > > runfetchcmd("%s remote set-url origin %s" %
> > > > (ud.basecmd,
> > > > shlex.quote(repourl)), d, workdir=dest)
> > > >
> > > > def unpack(self, ud, destdir, d):
> > > > --
> > > > 2.39.5
> > > >
> > > >
> > > > -=-=-=-=-=-=-=-=-=-=-=-
> > > > Links: You receive all messages sent to this group.
> > > > View/Reply Online (#17109):
> > > > https://lists.openembedded.org/g/bitbake-devel/message/17109
> > > > Mute This Topic:
> > > > https://lists.openembedded.org/mt/110876221/1686489
> > > > Group Owner: bitbake-devel+owner@lists.openembedded.org
> > > > Unsubscribe:
> > > > https://lists.openembedded.org/g/bitbake-devel/unsub
> > > > [alex.kanavin@gmail.com]
> > > > -=-=-=-=-=-=-=-=-=-=-=-
> > > >
On Thu, 2025-02-06 at 13:45 +0100, Stefan Koch wrote: > Have made some test case experiments and added a functionality to > force > changing ud.shallow by using forceshallow=True. > Otherwise, the test cases have always set ud.shallow by default to 1. > > Commented example from lib/bb/tests/fetch.py: > > def test_shallow_clone_preferred_over_shallow(self): > self.add_empty_file('a') > self.add_empty_file('b') > # Fetch once to generate the shallow tarball > - fetcher, ud = self.fetch() > + fetcher, ud = self.fetch(forceshallow=True) # same as default > behaviour > assert os.path.exists(os.path.join(self.dldir, > ud.mirrortarballs[0])) > + fetcher, ud = self.fetch(forceshallow=False) # needed for test case > (with ud.shallow=1) there is no first clone because of fast shallow > cloning (provided by the patch) > # Fetch and unpack with both the clonedir and shallow tarball > available > - fetcher, ud = self.fetch_and_unpack() > + fetcher, ud = self.fetch_and_unpack(forceshallow=True) # same as > default behaviour > # The unpacked tree should *not* be shallow > self.assertRevCount(2) > assert not os.path.exists(os.path.join(self.gitdir, '.git', > 'shallow')) > > As described within PATCH 0/3, this is a proof-of-concept patch. > > So one important question is: > * Should be the ud.shallow=1 switch used for that? Then many test > cases > needs a rework. Separated test classes for default shallow and fast shallow modes, including common base test cases > * Or should another switch be introduced for that (or use > ud.shallow=2, > if possible)? Added `BB_GIT_SHALLOW_FAST`. When set to "1" for a fast initial shallow fetch, `BB_GIT_SHALLOW` will also be set automatically to "1". > > Stefan > > On Wed, 2025-01-29 at 12:35 +0100, Alexander Kanavin wrote: > > > This, and the other patches have no tests via bitbake-selftest. > > > Can > > > you look into adding some? I'll send an new patchset: a) - Adds a new `BB_GIT_SHALLOW_FAST` switch; when set, it will also automatically set `BB_GIT_SHALLOW` - Without `BB_GIT_SHALLOW_FAST` switch; old behaviour: first large bare mirror clone from remote + second small shallow clone from local mirror - With `BB_GIT_SHALLOW_FAST` switch; new behaviour: first small shallow clone from remote b) - Split existing test cases into: 1. common shallow-specific base test cases 2. default (no `BB_GIT_SHALLOW_FAST` mode) shallow-specific test cases 3. fast shallow-specific test cases => common test cases will executed twice with and without `BB_GIT_SHALLOW_FAST` mode. => specific test cases will only executed for either default or fast shallow mode Thanks, Stefan > > > > > > Alex > > > > > > On Wed, 29 Jan 2025 at 12:24, Koch, Stefan via > > > lists.openembedded.org > > > <stefan-koch=siemens.com@lists.openembedded.org> wrote: > > > > > > > > > > When `ud.shallow == 1`: > > > > > - Prefer an initial shallow clone over an initial bare clone, > > > > > while still utilizing any already existing bare clones. > > > > > > > > > > This improves: > > > > > - Solves timeout issues during initial clones on slow > > > > > internet > > > > > connections > > > > > by reducing the amount of data transferred. > > > > > - Eliminates the need to use a HTTPS tarball SRC_URI > > > > > to reduce data transfer. > > > > > - Allows SSH-based authentication (e.g. cert and agent-based) > > > > > when > > > > > using non-public repos, so additional HTTPS tokens may not > > > > > be > > > > > required. > > > > > > > > > > Signed-off-by: Stefan Koch <stefan-koch@siemens.com> > > > > > --- > > > > > lib/bb/fetch2/git.py | 92 > > > > > ++++++++++++++++++++++++++++++++++---------- > > > > > 1 file changed, 71 insertions(+), 21 deletions(-) > > > > > > > > > > diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py > > > > > index 6badda597..6d87c2f18 100644 > > > > > --- a/lib/bb/fetch2/git.py > > > > > +++ b/lib/bb/fetch2/git.py > > > > > @@ -366,6 +366,33 @@ class Git(FetchMethod): > > > > > def tarball_need_update(self, ud): > > > > > return ud.write_tarballs and not > > > > > os.path.exists(ud.fullmirror) > > > > > > > > > > + # Helper method for fetching Git LFS data > > > > > + def lfs_fetch(self, ud, d, clonedir, revision, > > > > > progresshandler, fetchall=False): > > > > > + try: > > > > > + if self._need_lfs(ud) and self._contains_lfs(ud, > > > > > d, > > > > > clonedir) and self._find_git_lfs(d) and len(revision): > > > > > + # Using worktree with the revision because > > > > > .lfsconfig may exists > > > > > + worktree_add_cmd = "%s worktree add wt %s" % > > > > > (ud.basecmd, revision) > > > > > + runfetchcmd(worktree_add_cmd, d, > > > > > log=progresshandler, workdir=clonedir) > > > > > + lfs_fetch_cmd = "%s lfs fetch %s" % > > > > > (ud.basecmd, > > > > > "--all" if fetchall else "") > > > > > + runfetchcmd(lfs_fetch_cmd, d, > > > > > log=progresshandler, > > > > > workdir=(clonedir + "/wt")) > > > > > + worktree_rem_cmd = "%s worktree remove -f > > > > > wt" > > > > > % > > > > > ud.basecmd > > > > > + runfetchcmd(worktree_rem_cmd, d, > > > > > log=progresshandler, workdir=clonedir) > > > > > + except: > > > > > + logger.warning("Fetching LFS did not succeed.") > > > > > + > > > > > + # Create as a temp file and move atomically into > > > > > position > > > > > to > > > > > avoid races > > > > > + @contextmanager > > > > > + def create_atomic(self, filename): > > > > > + fd, tfile = > > > > > tempfile.mkstemp(dir=os.path.dirname(filename)) > > > > > + try: > > > > > + yield tfile > > > > > + umask = os.umask(0o666) > > > > > + os.umask(umask) > > > > > + os.chmod(tfile, (0o666 & ~umask)) > > > > > + os.rename(tfile, filename) > > > > > + finally: > > > > > + os.close(fd) > > > > > + > > > > > def try_premirror(self, ud, d): > > > > > # If we don't do this, updating an existing checkout > > > > > with > > > > > only premirrors > > > > > # is not possible > > > > > @@ -446,7 +473,40 @@ class Git(FetchMethod): > > > > > if ud.proto.lower() != 'file': > > > > > bb.fetch2.check_network_access(d, clone_cmd, > > > > > ud.url) > > > > > progresshandler = GitProgressHandler(d) > > > > > - runfetchcmd(clone_cmd, d, log=progresshandler) > > > > > + > > > > > + # When ud.shallow is enabled: > > > > > + # Try creating an initial shallow clone > > > > > + shallowstate = False > > > > > + if ud.shallow: > > > > > + tempdir = > > > > > tempfile.mkdtemp(dir=d.getVar('DL_DIR')) > > > > > + shallowclone = os.path.join(tempdir, 'git') > > > > > + try: > > > > > + self.clone_shallow_local(ud, > > > > > shallowclone, > > > > > d) > > > > > + shallowstate = True > > > > > + except: > > > > > + logger.warning("Creating initial shallow > > > > > clone > > > > > failed, try regular clone now.") > > > > > + > > > > > + # When the shallow clone has succeeded: > > > > > + # Create shallow tarball > > > > > + if shallowstate: > > > > > + logger.info("Creating tarball of git > > > > > repository") > > > > > + with self.create_atomic(ud.fullshallow) > > > > > as > > > > > tfile: > > > > > + runfetchcmd("tar -czf %s ." % tfile, > > > > > d, > > > > > workdir=shallowclone) > > > > > + runfetchcmd("touch %s.done" % > > > > > ud.fullshallow, > > > > > d) > > > > > + > > > > > + # Always cleanup tempdir > > > > > + bb.utils.remove(tempdir, recurse=True) > > > > > + > > > > > + # When the shallow clone has succeeded: > > > > > + # Use shallow tarball > > > > > + if shallowstate: > > > > > + ud.localpath = ud.fullshallow > > > > > + return > > > > > + > > > > > + # When ud.shallow is disabled or the shallow > > > > > clone > > > > > failed: > > > > > + # Create an initial regular clone > > > > > + if not shallowstate: > > > > > + runfetchcmd(clone_cmd, d, > > > > > log=progresshandler) > > > > > > > > > > # Update the checkout if needed > > > > > if self.clonedir_need_update(ud, d): > > > > > @@ -509,20 +569,6 @@ class Git(FetchMethod): > > > > > runfetchcmd("tar -cf - lfs | tar -xf - - > > > > > C > > > > > %s" > > > > > % ud.clonedir, d, workdir="%s/.git" % ud.destdir) > > > > > > > > > > def build_mirror_data(self, ud, d): > > > > > - > > > > > - # Create as a temp file and move atomically into > > > > > position > > > > > to avoid races > > > > > - @contextmanager > > > > > - def create_atomic(filename): > > > > > - fd, tfile = > > > > > tempfile.mkstemp(dir=os.path.dirname(filename)) > > > > > - try: > > > > > - yield tfile > > > > > - umask = os.umask(0o666) > > > > > - os.umask(umask) > > > > > - os.chmod(tfile, (0o666 & ~umask)) > > > > > - os.rename(tfile, filename) > > > > > - finally: > > > > > - os.close(fd) > > > > > - > > > > > if ud.shallow and ud.write_shallow_tarballs: > > > > > if not os.path.exists(ud.fullshallow): > > > > > if os.path.islink(ud.fullshallow): > > > > > @@ -533,7 +579,7 @@ class Git(FetchMethod): > > > > > self.clone_shallow_local(ud, > > > > > shallowclone, > > > > > d) > > > > > > > > > > logger.info("Creating tarball of git > > > > > repository") > > > > > - with create_atomic(ud.fullshallow) as > > > > > tfile: > > > > > + with self.create_atomic(ud.fullshallow) > > > > > as > > > > > tfile: > > > > > runfetchcmd("tar -czf %s ." % tfile, > > > > > d, > > > > > workdir=shallowclone) > > > > > runfetchcmd("touch %s.done" % > > > > > ud.fullshallow, > > > > > d) > > > > > finally: > > > > > @@ -543,7 +589,7 @@ class Git(FetchMethod): > > > > > os.unlink(ud.fullmirror) > > > > > > > > > > logger.info("Creating tarball of git > > > > > repository") > > > > > - with create_atomic(ud.fullmirror) as tfile: > > > > > + with self.create_atomic(ud.fullmirror) as tfile: > > > > > mtime = runfetchcmd("{} log --all -1 > > > > > --format=%cD".format(ud.basecmd), d, > > > > > quiet=True, workdir=ud.clonedir) > > > > > runfetchcmd("tar -czf %s --owner oe:0 -- > > > > > group > > > > > oe:0 > > > > > --mtime \"%s\" ." > > > > > @@ -557,12 +603,15 @@ class Git(FetchMethod): > > > > > - For BB_GIT_SHALLOW_REVS: git fetch > > > > > --shallow-exclude=<revs> rev > > > > > """ > > > > > > > > > > + progresshandler = GitProgressHandler(d) > > > > > + repourl = self._get_repo_url(ud) > > > > > bb.utils.mkdirhier(dest) > > > > > init_cmd = "%s init -q" % ud.basecmd > > > > > if ud.bareclone: > > > > > init_cmd += " --bare" > > > > > runfetchcmd(init_cmd, d, workdir=dest) > > > > > - runfetchcmd("%s remote add origin %s" % (ud.basecmd, > > > > > ud.clonedir), d, workdir=dest) > > > > > + # Use repourl when creating the initial shallow > > > > > clone > > > > > + runfetchcmd("%s remote add origin %s" % (ud.basecmd, > > > > > shlex.quote(repourl) if ud.shallow and not > > > > > os.path.exists(ud.clonedir) else ud.clonedir), d, > > > > > workdir=dest) > > > > > > > > > > # Check the histories which should be excluded > > > > > shallow_exclude = '' > > > > > @@ -600,10 +649,12 @@ class Git(FetchMethod): > > > > > # The ud.clonedir is a local temporary dir, will > > > > > be > > > > > removed when > > > > > # fetch is done, so we can do anything on it. > > > > > adv_cmd = 'git branch -f advertise-%s %s' % > > > > > (revision, > > > > > revision) > > > > > - runfetchcmd(adv_cmd, d, workdir=ud.clonedir) > > > > > + if not ud.shallow: > > > > > + runfetchcmd(adv_cmd, d, workdir=ud.clonedir) > > > > > > > > > > - runfetchcmd(fetch_cmd, d, workdir=dest) > > > > > + runfetchcmd(fetch_cmd, d, log=progresshandler, > > > > > workdir=dest) > > > > > runfetchcmd("%s update-ref %s %s" % (ud.basecmd, > > > > > ref, > > > > > revision), d, workdir=dest) > > > > > + self.lfs_fetch(ud, d, dest, > > > > > ud.revisions[ud.names[0]], > > > > > progresshandler) > > > > > > > > > > # Apply extra ref wildcards > > > > > all_refs_remote = runfetchcmd("%s ls-remote origin > > > > > 'refs/*'" % ud.basecmd, \ > > > > > @@ -629,7 +680,6 @@ class Git(FetchMethod): > > > > > runfetchcmd("%s update-ref %s %s" % (ud.basecmd, > > > > > ref, > > > > > revision), d, workdir=dest) > > > > > > > > > > # The url is local ud.clonedir, set it to upstream > > > > > one > > > > > - repourl = self._get_repo_url(ud) > > > > > runfetchcmd("%s remote set-url origin %s" % > > > > > (ud.basecmd, > > > > > shlex.quote(repourl)), d, workdir=dest) > > > > > > > > > > def unpack(self, ud, destdir, d): > > > > > -- > > > > > 2.39.5 > > > > > > > > > > > > > > > -=-=-=-=-=-=-=-=-=-=-=- > > > > > Links: You receive all messages sent to this group. > > > > > View/Reply Online (#17109): > > > > > https://lists.openembedded.org/g/bitbake-devel/message/17109 > > > > > Mute This Topic: > > > > > https://lists.openembedded.org/mt/110876221/1686489 > > > > > Group Owner: bitbake-devel+owner@lists.openembedded.org > > > > > Unsubscribe: > > > > > https://lists.openembedded.org/g/bitbake-devel/unsub > > > > > [alex.kanavin@gmail.com] > > > > > -=-=-=-=-=-=-=-=-=-=-=- > > > > > > -- Stefan Koch Siemens AG http://www.siemens.com/
diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py index 6badda597..6d87c2f18 100644 --- a/lib/bb/fetch2/git.py +++ b/lib/bb/fetch2/git.py @@ -366,6 +366,33 @@ class Git(FetchMethod): def tarball_need_update(self, ud): return ud.write_tarballs and not os.path.exists(ud.fullmirror) + # Helper method for fetching Git LFS data + def lfs_fetch(self, ud, d, clonedir, revision, progresshandler, fetchall=False): + try: + if self._need_lfs(ud) and self._contains_lfs(ud, d, clonedir) and self._find_git_lfs(d) and len(revision): + # Using worktree with the revision because .lfsconfig may exists + worktree_add_cmd = "%s worktree add wt %s" % (ud.basecmd, revision) + runfetchcmd(worktree_add_cmd, d, log=progresshandler, workdir=clonedir) + lfs_fetch_cmd = "%s lfs fetch %s" % (ud.basecmd, "--all" if fetchall else "") + runfetchcmd(lfs_fetch_cmd, d, log=progresshandler, workdir=(clonedir + "/wt")) + worktree_rem_cmd = "%s worktree remove -f wt" % ud.basecmd + runfetchcmd(worktree_rem_cmd, d, log=progresshandler, workdir=clonedir) + except: + logger.warning("Fetching LFS did not succeed.") + + # Create as a temp file and move atomically into position to avoid races + @contextmanager + def create_atomic(self, filename): + fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename)) + try: + yield tfile + umask = os.umask(0o666) + os.umask(umask) + os.chmod(tfile, (0o666 & ~umask)) + os.rename(tfile, filename) + finally: + os.close(fd) + def try_premirror(self, ud, d): # If we don't do this, updating an existing checkout with only premirrors # is not possible @@ -446,7 +473,40 @@ class Git(FetchMethod): if ud.proto.lower() != 'file': bb.fetch2.check_network_access(d, clone_cmd, ud.url) progresshandler = GitProgressHandler(d) - runfetchcmd(clone_cmd, d, log=progresshandler) + + # When ud.shallow is enabled: + # Try creating an initial shallow clone + shallowstate = False + if ud.shallow: + tempdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR')) + shallowclone = os.path.join(tempdir, 'git') + try: + self.clone_shallow_local(ud, shallowclone, d) + shallowstate = True + except: + logger.warning("Creating initial shallow clone failed, try regular clone now.") + + # When the shallow clone has succeeded: + # Create shallow tarball + if shallowstate: + logger.info("Creating tarball of git repository") + with self.create_atomic(ud.fullshallow) as tfile: + runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone) + runfetchcmd("touch %s.done" % ud.fullshallow, d) + + # Always cleanup tempdir + bb.utils.remove(tempdir, recurse=True) + + # When the shallow clone has succeeded: + # Use shallow tarball + if shallowstate: + ud.localpath = ud.fullshallow + return + + # When ud.shallow is disabled or the shallow clone failed: + # Create an initial regular clone + if not shallowstate: + runfetchcmd(clone_cmd, d, log=progresshandler) # Update the checkout if needed if self.clonedir_need_update(ud, d): @@ -509,20 +569,6 @@ class Git(FetchMethod): runfetchcmd("tar -cf - lfs | tar -xf - -C %s" % ud.clonedir, d, workdir="%s/.git" % ud.destdir) def build_mirror_data(self, ud, d): - - # Create as a temp file and move atomically into position to avoid races - @contextmanager - def create_atomic(filename): - fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename)) - try: - yield tfile - umask = os.umask(0o666) - os.umask(umask) - os.chmod(tfile, (0o666 & ~umask)) - os.rename(tfile, filename) - finally: - os.close(fd) - if ud.shallow and ud.write_shallow_tarballs: if not os.path.exists(ud.fullshallow): if os.path.islink(ud.fullshallow): @@ -533,7 +579,7 @@ class Git(FetchMethod): self.clone_shallow_local(ud, shallowclone, d) logger.info("Creating tarball of git repository") - with create_atomic(ud.fullshallow) as tfile: + with self.create_atomic(ud.fullshallow) as tfile: runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone) runfetchcmd("touch %s.done" % ud.fullshallow, d) finally: @@ -543,7 +589,7 @@ class Git(FetchMethod): os.unlink(ud.fullmirror) logger.info("Creating tarball of git repository") - with create_atomic(ud.fullmirror) as tfile: + with self.create_atomic(ud.fullmirror) as tfile: mtime = runfetchcmd("{} log --all -1 --format=%cD".format(ud.basecmd), d, quiet=True, workdir=ud.clonedir) runfetchcmd("tar -czf %s --owner oe:0 --group oe:0 --mtime \"%s\" ." @@ -557,12 +603,15 @@ class Git(FetchMethod): - For BB_GIT_SHALLOW_REVS: git fetch --shallow-exclude=<revs> rev """ + progresshandler = GitProgressHandler(d) + repourl = self._get_repo_url(ud) bb.utils.mkdirhier(dest) init_cmd = "%s init -q" % ud.basecmd if ud.bareclone: init_cmd += " --bare" runfetchcmd(init_cmd, d, workdir=dest) - runfetchcmd("%s remote add origin %s" % (ud.basecmd, ud.clonedir), d, workdir=dest) + # Use repourl when creating the initial shallow clone + runfetchcmd("%s remote add origin %s" % (ud.basecmd, shlex.quote(repourl) if ud.shallow and not os.path.exists(ud.clonedir) else ud.clonedir), d, workdir=dest) # Check the histories which should be excluded shallow_exclude = '' @@ -600,10 +649,12 @@ class Git(FetchMethod): # The ud.clonedir is a local temporary dir, will be removed when # fetch is done, so we can do anything on it. adv_cmd = 'git branch -f advertise-%s %s' % (revision, revision) - runfetchcmd(adv_cmd, d, workdir=ud.clonedir) + if not ud.shallow: + runfetchcmd(adv_cmd, d, workdir=ud.clonedir) - runfetchcmd(fetch_cmd, d, workdir=dest) + runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=dest) runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest) + self.lfs_fetch(ud, d, dest, ud.revisions[ud.names[0]], progresshandler) # Apply extra ref wildcards all_refs_remote = runfetchcmd("%s ls-remote origin 'refs/*'" % ud.basecmd, \ @@ -629,7 +680,6 @@ class Git(FetchMethod): runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest) # The url is local ud.clonedir, set it to upstream one - repourl = self._get_repo_url(ud) runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=dest) def unpack(self, ud, destdir, d):
When `ud.shallow == 1`: - Prefer an initial shallow clone over an initial bare clone, while still utilizing any already existing bare clones. This improves: - Solves timeout issues during initial clones on slow internet connections by reducing the amount of data transferred. - Eliminates the need to use a HTTPS tarball SRC_URI to reduce data transfer. - Allows SSH-based authentication (e.g. cert and agent-based) when using non-public repos, so additional HTTPS tokens may not be required. Signed-off-by: Stefan Koch <stefan-koch@siemens.com> --- lib/bb/fetch2/git.py | 92 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 71 insertions(+), 21 deletions(-)