diff mbox series

[1/3] fetch2/git: Add support for fast initial shallow fetch

Message ID 20250129112406.1660522-2-stefan-koch@siemens.com
State New
Headers show
Series fetch2/git: Improve shallow, lfs, and tag support | expand

Commit Message

Koch, Stefan Jan. 29, 2025, 11:24 a.m. UTC
When `ud.shallow == 1`:
- Prefer an initial shallow clone over an initial bare clone,
  while still utilizing any already existing bare clones.

This improves:
- Solves timeout issues during initial clones on slow internet connections
  by reducing the amount of data transferred.
- Eliminates the need to use a HTTPS tarball SRC_URI
  to reduce data transfer.
- Allows SSH-based authentication (e.g. cert and agent-based) when
  using non-public repos, so additional HTTPS tokens may not be required.

Signed-off-by: Stefan Koch <stefan-koch@siemens.com>
---
 lib/bb/fetch2/git.py | 92 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 71 insertions(+), 21 deletions(-)

Comments

Alexander Kanavin Jan. 29, 2025, 11:35 a.m. UTC | #1
This, and the other patches have no tests via bitbake-selftest. Can
you look into adding some?

Alex

On Wed, 29 Jan 2025 at 12:24, Koch, Stefan via lists.openembedded.org
<stefan-koch=siemens.com@lists.openembedded.org> wrote:
>
> When `ud.shallow == 1`:
> - Prefer an initial shallow clone over an initial bare clone,
>   while still utilizing any already existing bare clones.
>
> This improves:
> - Solves timeout issues during initial clones on slow internet connections
>   by reducing the amount of data transferred.
> - Eliminates the need to use a HTTPS tarball SRC_URI
>   to reduce data transfer.
> - Allows SSH-based authentication (e.g. cert and agent-based) when
>   using non-public repos, so additional HTTPS tokens may not be required.
>
> Signed-off-by: Stefan Koch <stefan-koch@siemens.com>
> ---
>  lib/bb/fetch2/git.py | 92 ++++++++++++++++++++++++++++++++++----------
>  1 file changed, 71 insertions(+), 21 deletions(-)
>
> diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py
> index 6badda597..6d87c2f18 100644
> --- a/lib/bb/fetch2/git.py
> +++ b/lib/bb/fetch2/git.py
> @@ -366,6 +366,33 @@ class Git(FetchMethod):
>      def tarball_need_update(self, ud):
>          return ud.write_tarballs and not os.path.exists(ud.fullmirror)
>
> +    # Helper method for fetching Git LFS data
> +    def lfs_fetch(self, ud, d, clonedir, revision, progresshandler, fetchall=False):
> +        try:
> +            if self._need_lfs(ud) and self._contains_lfs(ud, d, clonedir) and self._find_git_lfs(d) and len(revision):
> +                # Using worktree with the revision because .lfsconfig may exists
> +                worktree_add_cmd = "%s worktree add wt %s" % (ud.basecmd, revision)
> +                runfetchcmd(worktree_add_cmd, d, log=progresshandler, workdir=clonedir)
> +                lfs_fetch_cmd = "%s lfs fetch %s" % (ud.basecmd, "--all" if fetchall else "")
> +                runfetchcmd(lfs_fetch_cmd, d, log=progresshandler, workdir=(clonedir + "/wt"))
> +                worktree_rem_cmd = "%s worktree remove -f wt" % ud.basecmd
> +                runfetchcmd(worktree_rem_cmd, d, log=progresshandler, workdir=clonedir)
> +        except:
> +            logger.warning("Fetching LFS did not succeed.")
> +
> +    # Create as a temp file and move atomically into position to avoid races
> +    @contextmanager
> +    def create_atomic(self, filename):
> +        fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename))
> +        try:
> +            yield tfile
> +            umask = os.umask(0o666)
> +            os.umask(umask)
> +            os.chmod(tfile, (0o666 & ~umask))
> +            os.rename(tfile, filename)
> +        finally:
> +            os.close(fd)
> +
>      def try_premirror(self, ud, d):
>          # If we don't do this, updating an existing checkout with only premirrors
>          # is not possible
> @@ -446,7 +473,40 @@ class Git(FetchMethod):
>              if ud.proto.lower() != 'file':
>                  bb.fetch2.check_network_access(d, clone_cmd, ud.url)
>              progresshandler = GitProgressHandler(d)
> -            runfetchcmd(clone_cmd, d, log=progresshandler)
> +
> +            # When ud.shallow is enabled:
> +            # Try creating an initial shallow clone
> +            shallowstate = False
> +            if ud.shallow:
> +                tempdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR'))
> +                shallowclone = os.path.join(tempdir, 'git')
> +                try:
> +                    self.clone_shallow_local(ud, shallowclone, d)
> +                    shallowstate = True
> +                except:
> +                    logger.warning("Creating initial shallow clone failed, try regular clone now.")
> +
> +                # When the shallow clone has succeeded:
> +                # Create shallow tarball
> +                if shallowstate:
> +                    logger.info("Creating tarball of git repository")
> +                    with self.create_atomic(ud.fullshallow) as tfile:
> +                        runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone)
> +                    runfetchcmd("touch %s.done" % ud.fullshallow, d)
> +
> +                # Always cleanup tempdir
> +                bb.utils.remove(tempdir, recurse=True)
> +
> +                # When the shallow clone has succeeded:
> +                # Use shallow tarball
> +                if shallowstate:
> +                    ud.localpath = ud.fullshallow
> +                    return
> +
> +            # When ud.shallow is disabled or the shallow clone failed:
> +            # Create an initial regular clone
> +            if not shallowstate:
> +                runfetchcmd(clone_cmd, d, log=progresshandler)
>
>          # Update the checkout if needed
>          if self.clonedir_need_update(ud, d):
> @@ -509,20 +569,6 @@ class Git(FetchMethod):
>                      runfetchcmd("tar -cf - lfs | tar -xf - -C %s" % ud.clonedir, d, workdir="%s/.git" % ud.destdir)
>
>      def build_mirror_data(self, ud, d):
> -
> -        # Create as a temp file and move atomically into position to avoid races
> -        @contextmanager
> -        def create_atomic(filename):
> -            fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename))
> -            try:
> -                yield tfile
> -                umask = os.umask(0o666)
> -                os.umask(umask)
> -                os.chmod(tfile, (0o666 & ~umask))
> -                os.rename(tfile, filename)
> -            finally:
> -                os.close(fd)
> -
>          if ud.shallow and ud.write_shallow_tarballs:
>              if not os.path.exists(ud.fullshallow):
>                  if os.path.islink(ud.fullshallow):
> @@ -533,7 +579,7 @@ class Git(FetchMethod):
>                      self.clone_shallow_local(ud, shallowclone, d)
>
>                      logger.info("Creating tarball of git repository")
> -                    with create_atomic(ud.fullshallow) as tfile:
> +                    with self.create_atomic(ud.fullshallow) as tfile:
>                          runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone)
>                      runfetchcmd("touch %s.done" % ud.fullshallow, d)
>                  finally:
> @@ -543,7 +589,7 @@ class Git(FetchMethod):
>                  os.unlink(ud.fullmirror)
>
>              logger.info("Creating tarball of git repository")
> -            with create_atomic(ud.fullmirror) as tfile:
> +            with self.create_atomic(ud.fullmirror) as tfile:
>                  mtime = runfetchcmd("{} log --all -1 --format=%cD".format(ud.basecmd), d,
>                          quiet=True, workdir=ud.clonedir)
>                  runfetchcmd("tar -czf %s --owner oe:0 --group oe:0 --mtime \"%s\" ."
> @@ -557,12 +603,15 @@ class Git(FetchMethod):
>          - For BB_GIT_SHALLOW_REVS: git fetch --shallow-exclude=<revs> rev
>          """
>
> +        progresshandler = GitProgressHandler(d)
> +        repourl = self._get_repo_url(ud)
>          bb.utils.mkdirhier(dest)
>          init_cmd = "%s init -q" % ud.basecmd
>          if ud.bareclone:
>              init_cmd += " --bare"
>          runfetchcmd(init_cmd, d, workdir=dest)
> -        runfetchcmd("%s remote add origin %s" % (ud.basecmd, ud.clonedir), d, workdir=dest)
> +        # Use repourl when creating the initial shallow clone
> +        runfetchcmd("%s remote add origin %s" % (ud.basecmd, shlex.quote(repourl) if ud.shallow and not os.path.exists(ud.clonedir) else ud.clonedir), d, workdir=dest)
>
>          # Check the histories which should be excluded
>          shallow_exclude = ''
> @@ -600,10 +649,12 @@ class Git(FetchMethod):
>              # The ud.clonedir is a local temporary dir, will be removed when
>              # fetch is done, so we can do anything on it.
>              adv_cmd = 'git branch -f advertise-%s %s' % (revision, revision)
> -            runfetchcmd(adv_cmd, d, workdir=ud.clonedir)
> +            if not ud.shallow:
> +                runfetchcmd(adv_cmd, d, workdir=ud.clonedir)
>
> -            runfetchcmd(fetch_cmd, d, workdir=dest)
> +            runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=dest)
>              runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest)
> +            self.lfs_fetch(ud, d, dest, ud.revisions[ud.names[0]], progresshandler)
>
>          # Apply extra ref wildcards
>          all_refs_remote = runfetchcmd("%s ls-remote origin 'refs/*'" % ud.basecmd, \
> @@ -629,7 +680,6 @@ class Git(FetchMethod):
>              runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest)
>
>          # The url is local ud.clonedir, set it to upstream one
> -        repourl = self._get_repo_url(ud)
>          runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=dest)
>
>      def unpack(self, ud, destdir, d):
> --
> 2.39.5
>
>
> -=-=-=-=-=-=-=-=-=-=-=-
> Links: You receive all messages sent to this group.
> View/Reply Online (#17109): https://lists.openembedded.org/g/bitbake-devel/message/17109
> Mute This Topic: https://lists.openembedded.org/mt/110876221/1686489
> Group Owner: bitbake-devel+owner@lists.openembedded.org
> Unsubscribe: https://lists.openembedded.org/g/bitbake-devel/unsub [alex.kanavin@gmail.com]
> -=-=-=-=-=-=-=-=-=-=-=-
>
Koch, Stefan Feb. 6, 2025, 12:45 p.m. UTC | #2
Have made some test case experiments and added a functionality to force
changing ud.shallow by using forceshallow=True.
Otherwise, the test cases have always set ud.shallow by default to 1.

Commented example from lib/bb/tests/fetch.py:

 def test_shallow_clone_preferred_over_shallow(self):
 self.add_empty_file('a')
 self.add_empty_file('b')
 # Fetch once to generate the shallow tarball
- fetcher, ud = self.fetch()
+ fetcher, ud = self.fetch(forceshallow=True) # same as default
behaviour
 assert os.path.exists(os.path.join(self.dldir, ud.mirrortarballs[0]))
+ fetcher, ud = self.fetch(forceshallow=False) # needed for test case
(with ud.shallow=1) there is no first clone because of fast shallow
cloning (provided by the patch)
 # Fetch and unpack with both the clonedir and shallow tarball
available
- fetcher, ud = self.fetch_and_unpack()
+ fetcher, ud = self.fetch_and_unpack(forceshallow=True) # same as
default behaviour
 # The unpacked tree should *not* be shallow
 self.assertRevCount(2)
 assert not os.path.exists(os.path.join(self.gitdir, '.git',
'shallow'))

As described within PATCH 0/3, this is a proof-of-concept patch.

So one important question is:
* Should be the ud.shallow=1 switch used for that? Then many test cases
needs a rework.
* Or should another switch be introduced for that (or use ud.shallow=2,
if possible)?

Stefan

On Wed, 2025-01-29 at 12:35 +0100, Alexander Kanavin wrote:
> > This, and the other patches have no tests via bitbake-selftest. Can
> > you look into adding some?
> >
> > Alex
> >
> > On Wed, 29 Jan 2025 at 12:24, Koch, Stefan via
> > lists.openembedded.org
> > <stefan-koch=siemens.com@lists.openembedded.org> wrote:
> > > >
> > > > When `ud.shallow == 1`:
> > > > - Prefer an initial shallow clone over an initial bare clone,
> > > >   while still utilizing any already existing bare clones.
> > > >
> > > > This improves:
> > > > - Solves timeout issues during initial clones on slow internet
> > > > connections
> > > >   by reducing the amount of data transferred.
> > > > - Eliminates the need to use a HTTPS tarball SRC_URI
> > > >   to reduce data transfer.
> > > > - Allows SSH-based authentication (e.g. cert and agent-based)
> > > > when
> > > >   using non-public repos, so additional HTTPS tokens may not be
> > > > required.
> > > >
> > > > Signed-off-by: Stefan Koch <stefan-koch@siemens.com>
> > > > ---
> > > >  lib/bb/fetch2/git.py | 92
> > > > ++++++++++++++++++++++++++++++++++----------
> > > >  1 file changed, 71 insertions(+), 21 deletions(-)
> > > >
> > > > diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py
> > > > index 6badda597..6d87c2f18 100644
> > > > --- a/lib/bb/fetch2/git.py
> > > > +++ b/lib/bb/fetch2/git.py
> > > > @@ -366,6 +366,33 @@ class Git(FetchMethod):
> > > >      def tarball_need_update(self, ud):
> > > >          return ud.write_tarballs and not
> > > > os.path.exists(ud.fullmirror)
> > > >
> > > > +    # Helper method for fetching Git LFS data
> > > > +    def lfs_fetch(self, ud, d, clonedir, revision,
> > > > progresshandler, fetchall=False):
> > > > +        try:
> > > > +            if self._need_lfs(ud) and self._contains_lfs(ud,
> > > > d,
> > > > clonedir) and self._find_git_lfs(d) and len(revision):
> > > > +                # Using worktree with the revision because
> > > > .lfsconfig may exists
> > > > +                worktree_add_cmd = "%s worktree add wt %s" %
> > > > (ud.basecmd, revision)
> > > > +                runfetchcmd(worktree_add_cmd, d,
> > > > log=progresshandler, workdir=clonedir)
> > > > +                lfs_fetch_cmd = "%s lfs fetch %s" %
> > > > (ud.basecmd,
> > > > "--all" if fetchall else "")
> > > > +                runfetchcmd(lfs_fetch_cmd, d,
> > > > log=progresshandler,
> > > > workdir=(clonedir + "/wt"))
> > > > +                worktree_rem_cmd = "%s worktree remove -f wt"
> > > > %
> > > > ud.basecmd
> > > > +                runfetchcmd(worktree_rem_cmd, d,
> > > > log=progresshandler, workdir=clonedir)
> > > > +        except:
> > > > +            logger.warning("Fetching LFS did not succeed.")
> > > > +
> > > > +    # Create as a temp file and move atomically into position
> > > > to
> > > > avoid races
> > > > +    @contextmanager
> > > > +    def create_atomic(self, filename):
> > > > +        fd, tfile =
> > > > tempfile.mkstemp(dir=os.path.dirname(filename))
> > > > +        try:
> > > > +            yield tfile
> > > > +            umask = os.umask(0o666)
> > > > +            os.umask(umask)
> > > > +            os.chmod(tfile, (0o666 & ~umask))
> > > > +            os.rename(tfile, filename)
> > > > +        finally:
> > > > +            os.close(fd)
> > > > +
> > > >      def try_premirror(self, ud, d):
> > > >          # If we don't do this, updating an existing checkout
> > > > with
> > > > only premirrors
> > > >          # is not possible
> > > > @@ -446,7 +473,40 @@ class Git(FetchMethod):
> > > >              if ud.proto.lower() != 'file':
> > > >                  bb.fetch2.check_network_access(d, clone_cmd,
> > > > ud.url)
> > > >              progresshandler = GitProgressHandler(d)
> > > > -            runfetchcmd(clone_cmd, d, log=progresshandler)
> > > > +
> > > > +            # When ud.shallow is enabled:
> > > > +            # Try creating an initial shallow clone
> > > > +            shallowstate = False
> > > > +            if ud.shallow:
> > > > +                tempdir =
> > > > tempfile.mkdtemp(dir=d.getVar('DL_DIR'))
> > > > +                shallowclone = os.path.join(tempdir, 'git')
> > > > +                try:
> > > > +                    self.clone_shallow_local(ud, shallowclone,
> > > > d)
> > > > +                    shallowstate = True
> > > > +                except:
> > > > +                    logger.warning("Creating initial shallow
> > > > clone
> > > > failed, try regular clone now.")
> > > > +
> > > > +                # When the shallow clone has succeeded:
> > > > +                # Create shallow tarball
> > > > +                if shallowstate:
> > > > +                    logger.info("Creating tarball of git
> > > > repository")
> > > > +                    with self.create_atomic(ud.fullshallow) as
> > > > tfile:
> > > > +                        runfetchcmd("tar -czf %s ." % tfile,
> > > > d,
> > > > workdir=shallowclone)
> > > > +                    runfetchcmd("touch %s.done" %
> > > > ud.fullshallow,
> > > > d)
> > > > +
> > > > +                # Always cleanup tempdir
> > > > +                bb.utils.remove(tempdir, recurse=True)
> > > > +
> > > > +                # When the shallow clone has succeeded:
> > > > +                # Use shallow tarball
> > > > +                if shallowstate:
> > > > +                    ud.localpath = ud.fullshallow
> > > > +                    return
> > > > +
> > > > +            # When ud.shallow is disabled or the shallow clone
> > > > failed:
> > > > +            # Create an initial regular clone
> > > > +            if not shallowstate:
> > > > +                runfetchcmd(clone_cmd, d, log=progresshandler)
> > > >
> > > >          # Update the checkout if needed
> > > >          if self.clonedir_need_update(ud, d):
> > > > @@ -509,20 +569,6 @@ class Git(FetchMethod):
> > > >                      runfetchcmd("tar -cf - lfs | tar -xf - -C
> > > > %s"
> > > > % ud.clonedir, d, workdir="%s/.git" % ud.destdir)
> > > >
> > > >      def build_mirror_data(self, ud, d):
> > > > -
> > > > -        # Create as a temp file and move atomically into
> > > > position
> > > > to avoid races
> > > > -        @contextmanager
> > > > -        def create_atomic(filename):
> > > > -            fd, tfile =
> > > > tempfile.mkstemp(dir=os.path.dirname(filename))
> > > > -            try:
> > > > -                yield tfile
> > > > -                umask = os.umask(0o666)
> > > > -                os.umask(umask)
> > > > -                os.chmod(tfile, (0o666 & ~umask))
> > > > -                os.rename(tfile, filename)
> > > > -            finally:
> > > > -                os.close(fd)
> > > > -
> > > >          if ud.shallow and ud.write_shallow_tarballs:
> > > >              if not os.path.exists(ud.fullshallow):
> > > >                  if os.path.islink(ud.fullshallow):
> > > > @@ -533,7 +579,7 @@ class Git(FetchMethod):
> > > >                      self.clone_shallow_local(ud, shallowclone,
> > > > d)
> > > >
> > > >                      logger.info("Creating tarball of git
> > > > repository")
> > > > -                    with create_atomic(ud.fullshallow) as
> > > > tfile:
> > > > +                    with self.create_atomic(ud.fullshallow) as
> > > > tfile:
> > > >                          runfetchcmd("tar -czf %s ." % tfile,
> > > > d,
> > > > workdir=shallowclone)
> > > >                      runfetchcmd("touch %s.done" %
> > > > ud.fullshallow,
> > > > d)
> > > >                  finally:
> > > > @@ -543,7 +589,7 @@ class Git(FetchMethod):
> > > >                  os.unlink(ud.fullmirror)
> > > >
> > > >              logger.info("Creating tarball of git repository")
> > > > -            with create_atomic(ud.fullmirror) as tfile:
> > > > +            with self.create_atomic(ud.fullmirror) as tfile:
> > > >                  mtime = runfetchcmd("{} log --all -1
> > > > --format=%cD".format(ud.basecmd), d,
> > > >                          quiet=True, workdir=ud.clonedir)
> > > >                  runfetchcmd("tar -czf %s --owner oe:0 --group
> > > > oe:0
> > > > --mtime \"%s\" ."
> > > > @@ -557,12 +603,15 @@ class Git(FetchMethod):
> > > >          - For BB_GIT_SHALLOW_REVS: git fetch
> > > > --shallow-exclude=<revs> rev
> > > >          """
> > > >
> > > > +        progresshandler = GitProgressHandler(d)
> > > > +        repourl = self._get_repo_url(ud)
> > > >          bb.utils.mkdirhier(dest)
> > > >          init_cmd = "%s init -q" % ud.basecmd
> > > >          if ud.bareclone:
> > > >              init_cmd += " --bare"
> > > >          runfetchcmd(init_cmd, d, workdir=dest)
> > > > -        runfetchcmd("%s remote add origin %s" % (ud.basecmd,
> > > > ud.clonedir), d, workdir=dest)
> > > > +        # Use repourl when creating the initial shallow clone
> > > > +        runfetchcmd("%s remote add origin %s" % (ud.basecmd,
> > > > shlex.quote(repourl) if ud.shallow and not
> > > > os.path.exists(ud.clonedir) else ud.clonedir), d, workdir=dest)
> > > >
> > > >          # Check the histories which should be excluded
> > > >          shallow_exclude = ''
> > > > @@ -600,10 +649,12 @@ class Git(FetchMethod):
> > > >              # The ud.clonedir is a local temporary dir, will
> > > > be
> > > > removed when
> > > >              # fetch is done, so we can do anything on it.
> > > >              adv_cmd = 'git branch -f advertise-%s %s' %
> > > > (revision,
> > > > revision)
> > > > -            runfetchcmd(adv_cmd, d, workdir=ud.clonedir)
> > > > +            if not ud.shallow:
> > > > +                runfetchcmd(adv_cmd, d, workdir=ud.clonedir)
> > > >
> > > > -            runfetchcmd(fetch_cmd, d, workdir=dest)
> > > > +            runfetchcmd(fetch_cmd, d, log=progresshandler,
> > > > workdir=dest)
> > > >              runfetchcmd("%s update-ref %s %s" % (ud.basecmd,
> > > > ref,
> > > > revision), d, workdir=dest)
> > > > +            self.lfs_fetch(ud, d, dest,
> > > > ud.revisions[ud.names[0]],
> > > > progresshandler)
> > > >
> > > >          # Apply extra ref wildcards
> > > >          all_refs_remote = runfetchcmd("%s ls-remote origin
> > > > 'refs/*'" % ud.basecmd, \
> > > > @@ -629,7 +680,6 @@ class Git(FetchMethod):
> > > >              runfetchcmd("%s update-ref %s %s" % (ud.basecmd,
> > > > ref,
> > > > revision), d, workdir=dest)
> > > >
> > > >          # The url is local ud.clonedir, set it to upstream one
> > > > -        repourl = self._get_repo_url(ud)
> > > >          runfetchcmd("%s remote set-url origin %s" %
> > > > (ud.basecmd,
> > > > shlex.quote(repourl)), d, workdir=dest)
> > > >
> > > >      def unpack(self, ud, destdir, d):
> > > > --
> > > > 2.39.5
> > > >
> > > >
> > > > -=-=-=-=-=-=-=-=-=-=-=-
> > > > Links: You receive all messages sent to this group.
> > > > View/Reply Online (#17109):
> > > > https://lists.openembedded.org/g/bitbake-devel/message/17109
> > > > Mute This Topic:
> > > > https://lists.openembedded.org/mt/110876221/1686489
> > > > Group Owner: bitbake-devel+owner@lists.openembedded.org
> > > > Unsubscribe:
> > > > https://lists.openembedded.org/g/bitbake-devel/unsub
> > > >  [alex.kanavin@gmail.com]
> > > > -=-=-=-=-=-=-=-=-=-=-=-
> > > >
diff mbox series

Patch

diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py
index 6badda597..6d87c2f18 100644
--- a/lib/bb/fetch2/git.py
+++ b/lib/bb/fetch2/git.py
@@ -366,6 +366,33 @@  class Git(FetchMethod):
     def tarball_need_update(self, ud):
         return ud.write_tarballs and not os.path.exists(ud.fullmirror)
 
+    # Helper method for fetching Git LFS data
+    def lfs_fetch(self, ud, d, clonedir, revision, progresshandler, fetchall=False):
+        try:
+            if self._need_lfs(ud) and self._contains_lfs(ud, d, clonedir) and self._find_git_lfs(d) and len(revision):
+                # Using worktree with the revision because .lfsconfig may exists
+                worktree_add_cmd = "%s worktree add wt %s" % (ud.basecmd, revision)
+                runfetchcmd(worktree_add_cmd, d, log=progresshandler, workdir=clonedir)
+                lfs_fetch_cmd = "%s lfs fetch %s" % (ud.basecmd, "--all" if fetchall else "")
+                runfetchcmd(lfs_fetch_cmd, d, log=progresshandler, workdir=(clonedir + "/wt"))
+                worktree_rem_cmd = "%s worktree remove -f wt" % ud.basecmd
+                runfetchcmd(worktree_rem_cmd, d, log=progresshandler, workdir=clonedir)
+        except:
+            logger.warning("Fetching LFS did not succeed.")
+
+    # Create as a temp file and move atomically into position to avoid races
+    @contextmanager
+    def create_atomic(self, filename):
+        fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename))
+        try:
+            yield tfile
+            umask = os.umask(0o666)
+            os.umask(umask)
+            os.chmod(tfile, (0o666 & ~umask))
+            os.rename(tfile, filename)
+        finally:
+            os.close(fd)
+
     def try_premirror(self, ud, d):
         # If we don't do this, updating an existing checkout with only premirrors
         # is not possible
@@ -446,7 +473,40 @@  class Git(FetchMethod):
             if ud.proto.lower() != 'file':
                 bb.fetch2.check_network_access(d, clone_cmd, ud.url)
             progresshandler = GitProgressHandler(d)
-            runfetchcmd(clone_cmd, d, log=progresshandler)
+
+            # When ud.shallow is enabled:
+            # Try creating an initial shallow clone
+            shallowstate = False
+            if ud.shallow:
+                tempdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR'))
+                shallowclone = os.path.join(tempdir, 'git')
+                try:
+                    self.clone_shallow_local(ud, shallowclone, d)
+                    shallowstate = True
+                except:
+                    logger.warning("Creating initial shallow clone failed, try regular clone now.")
+
+                # When the shallow clone has succeeded:
+                # Create shallow tarball
+                if shallowstate:
+                    logger.info("Creating tarball of git repository")
+                    with self.create_atomic(ud.fullshallow) as tfile:
+                        runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone)
+                    runfetchcmd("touch %s.done" % ud.fullshallow, d)
+
+                # Always cleanup tempdir
+                bb.utils.remove(tempdir, recurse=True)
+
+                # When the shallow clone has succeeded:
+                # Use shallow tarball
+                if shallowstate:
+                    ud.localpath = ud.fullshallow
+                    return
+
+            # When ud.shallow is disabled or the shallow clone failed:
+            # Create an initial regular clone
+            if not shallowstate:
+                runfetchcmd(clone_cmd, d, log=progresshandler)
 
         # Update the checkout if needed
         if self.clonedir_need_update(ud, d):
@@ -509,20 +569,6 @@  class Git(FetchMethod):
                     runfetchcmd("tar -cf - lfs | tar -xf - -C %s" % ud.clonedir, d, workdir="%s/.git" % ud.destdir)
 
     def build_mirror_data(self, ud, d):
-
-        # Create as a temp file and move atomically into position to avoid races
-        @contextmanager
-        def create_atomic(filename):
-            fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename))
-            try:
-                yield tfile
-                umask = os.umask(0o666)
-                os.umask(umask)
-                os.chmod(tfile, (0o666 & ~umask))
-                os.rename(tfile, filename)
-            finally:
-                os.close(fd)
-
         if ud.shallow and ud.write_shallow_tarballs:
             if not os.path.exists(ud.fullshallow):
                 if os.path.islink(ud.fullshallow):
@@ -533,7 +579,7 @@  class Git(FetchMethod):
                     self.clone_shallow_local(ud, shallowclone, d)
 
                     logger.info("Creating tarball of git repository")
-                    with create_atomic(ud.fullshallow) as tfile:
+                    with self.create_atomic(ud.fullshallow) as tfile:
                         runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone)
                     runfetchcmd("touch %s.done" % ud.fullshallow, d)
                 finally:
@@ -543,7 +589,7 @@  class Git(FetchMethod):
                 os.unlink(ud.fullmirror)
 
             logger.info("Creating tarball of git repository")
-            with create_atomic(ud.fullmirror) as tfile:
+            with self.create_atomic(ud.fullmirror) as tfile:
                 mtime = runfetchcmd("{} log --all -1 --format=%cD".format(ud.basecmd), d,
                         quiet=True, workdir=ud.clonedir)
                 runfetchcmd("tar -czf %s --owner oe:0 --group oe:0 --mtime \"%s\" ."
@@ -557,12 +603,15 @@  class Git(FetchMethod):
         - For BB_GIT_SHALLOW_REVS: git fetch --shallow-exclude=<revs> rev
         """
 
+        progresshandler = GitProgressHandler(d)
+        repourl = self._get_repo_url(ud)
         bb.utils.mkdirhier(dest)
         init_cmd = "%s init -q" % ud.basecmd
         if ud.bareclone:
             init_cmd += " --bare"
         runfetchcmd(init_cmd, d, workdir=dest)
-        runfetchcmd("%s remote add origin %s" % (ud.basecmd, ud.clonedir), d, workdir=dest)
+        # Use repourl when creating the initial shallow clone
+        runfetchcmd("%s remote add origin %s" % (ud.basecmd, shlex.quote(repourl) if ud.shallow and not os.path.exists(ud.clonedir) else ud.clonedir), d, workdir=dest)
 
         # Check the histories which should be excluded
         shallow_exclude = ''
@@ -600,10 +649,12 @@  class Git(FetchMethod):
             # The ud.clonedir is a local temporary dir, will be removed when
             # fetch is done, so we can do anything on it.
             adv_cmd = 'git branch -f advertise-%s %s' % (revision, revision)
-            runfetchcmd(adv_cmd, d, workdir=ud.clonedir)
+            if not ud.shallow:
+                runfetchcmd(adv_cmd, d, workdir=ud.clonedir)
 
-            runfetchcmd(fetch_cmd, d, workdir=dest)
+            runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=dest)
             runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest)
+            self.lfs_fetch(ud, d, dest, ud.revisions[ud.names[0]], progresshandler)
 
         # Apply extra ref wildcards
         all_refs_remote = runfetchcmd("%s ls-remote origin 'refs/*'" % ud.basecmd, \
@@ -629,7 +680,6 @@  class Git(FetchMethod):
             runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest)
 
         # The url is local ud.clonedir, set it to upstream one
-        repourl = self._get_repo_url(ud)
         runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=dest)
 
     def unpack(self, ud, destdir, d):