Message ID | 20220207115322.28209-1-pavel@zhukoff.net |
---|---|
State | New |
Headers | show |
Series | fetch: Use git rev-parse if network is not available | expand |
Hi Pavel, On 2/7/22 12:53, Pavel Zhukov wrote: > Sometimes Fetcher is initialized while network is disabled (unpack() is > one of such cases) and Fetcher fails to map non SHA1 SRCREV to SHA1 one. > In such cases git ls-remote cannot be used to determinate latest > revision and git rev-parse can be used instead. This approach doesn't > work with rebaseable repos so exception will occur for such repos. > > See [YOCTO #14707] for more details. Note: the issue doesn't occur each > time in unpack() because in normal flow (fetch->unpack in one build) > URI_HEADREVs are cached. > > Signed-off-by: Pavel Zhukov <pavel.zhukov@huawei.com> > --- > lib/bb/fetch2/git.py | 61 +++++++++++++++++++++++++++++++++---------- > lib/bb/tests/fetch.py | 43 ++++++++++++++++++++++++++++++ > 2 files changed, 90 insertions(+), 14 deletions(-) > > diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py > index 30da8e95..5f56d9fe 100644 > --- a/lib/bb/fetch2/git.py > +++ b/lib/bb/fetch2/git.py > @@ -122,6 +122,10 @@ class Git(FetchMethod): > def init(self, d): > pass > > + @staticmethod > + def _is_sha1(name): > + return len(name) == 40 and (True in [c in "abcdef0123456789" for c in name]) > + return re.fullmatch(r'[a-f0-9]{40}', name) is not None should actually do the trick. Might even want to include uppercase letters with A-F. Maybe we want this to be a compiled re pattern too since it's probably going to be used "a bit" :) (also pretty sure it should have been "and not(False in..)" because here if only ONE character is hex but not the rest, it still returns True AFAICT?) Cheers, Quentin > def supports(self, ud, d): > """ > Check to see if a given url can be fetched with git. > @@ -135,6 +139,8 @@ class Git(FetchMethod): > """ > init git specific variable within url data > so that the git method like latest_revision() can work > + NOTE: This function may be called from do_unpack and in that case network is not available > + avoid using network operations here or guard them properly. > """ > if 'protocol' in ud.parm: > ud.proto = ud.parm['protocol'] > @@ -248,28 +254,33 @@ class Git(FetchMethod): > > ud.setup_revisions(d) > > - for name in ud.names: > - # Ensure anything that doesn't look like a sha256 checksum/revision is translated into one > - if not ud.revisions[name] or len(ud.revisions[name]) != 40 or (False in [c in "abcdef0123456789" for c in ud.revisions[name]]): > - if ud.revisions[name]: > - ud.unresolvedrev[name] = ud.revisions[name] > - ud.revisions[name] = self.latest_revision(ud, d, name) > - > gitsrcname = '%s%s' % (ud.host.replace(':', '.'), ud.path.replace('/', '.').replace('*', '.').replace(' ','_')) > if gitsrcname.startswith('.'): > gitsrcname = gitsrcname[1:] > > - # for rebaseable git repo, it is necessary to keep mirror tar ball > - # per revision, so that even the revision disappears from the > - # upstream repo in the future, the mirror will remain intact and still > - # contains the revision > - if ud.rebaseable: > - for name in ud.names: > - gitsrcname = gitsrcname + '_' + ud.revisions[name] > > dl_dir = d.getVar("DL_DIR") > gitdir = d.getVar("GITDIR") or (dl_dir + "/git2") > + > + > + ## We need clonedir to be defined before latest_revision call for unpack routine to use rev-parse instead of ls-remote > ud.clonedir = os.path.join(gitdir, gitsrcname) > + > + for name in ud.names: > + # Ensure anything that doesn't look like a sha1 checksum/revision is translated into one > + if not ud.revisions[name] or not self._is_sha1(ud.revisions[name]): > + if ud.revisions[name]: > + ud.unresolvedrev[name] = ud.revisions[name] > + ud.revisions[name] = self.latest_revision(ud, d, name) > + if ud.rebaseable: > + # for rebaseable git repo, it is necessary to keep mirror tar ball > + # per revision, so that even the revision disappears from the > + # upstream repo in the future, the mirror will remain intact and still > + # contains the revision > + gitsrcname = gitsrcname + '_' + ud.revisions[name] > + if ud.rebaseable: > + ud.clonedir = os.path.join(gitdir, gitsrcname) > + > ud.localfile = ud.clonedir > > mirrortarball = 'git2_%s.tar.gz' % gitsrcname > @@ -697,6 +708,14 @@ class Git(FetchMethod): > slash_re = re.compile(r"/+") > return "git:" + ud.host + slash_re.sub(".", ud.path) + ud.unresolvedrev[name] > > + def _revparse(self, ud, d, rev): > + """ > + Return latest revision (SHA) for the refs specified by branch/tag/etc > + """ > + cmd = '{basecmd} rev-parse -q --verify {rev}'.format(basecmd = ud.basecmd, rev = rev) > + output = runfetchcmd(cmd, d, quiet=True, workdir = ud.clonedir) > + return output.strip() > + > def _lsremote(self, ud, d, search): > """ > Run git ls-remote with the specified search string > @@ -727,6 +746,20 @@ class Git(FetchMethod): > """ > Compute the HEAD revision for the url > """ > + > + ## Network is not availble and we missed the cache. Try rev-parse > + if d.getVar("BB_NO_NETWORK"): > + if ud.rebaseable: > + raise bb.fetch.UnpackError("Unable to map revision \"{name}\" to SHA1 revision without a network. " \ > + "SHA1 revision must be specified for rebaseable repos "\ > + "or \"BB_SRCREV_POLICY\" set to \"cache\". ".format(name=ud.unresolvedrev[name]),ud.host+ud.path) > + rev = self._revparse(ud, d, ud.unresolvedrev[name]) > + if rev is None: > + raise bb.fetch2.FetchError("Unable to resolve '%s' in local clone of repository in git rev-parse output for %s" % \ > + (ud.unresolvedrev[name], ud.host+ud.path)) > + return rev > + > + > output = self._lsremote(ud, d, "") > # Tags of the form ^{} may not work, need to fallback to other form > if ud.unresolvedrev[name][:5] == "refs/" or ud.usehead: > diff --git a/lib/bb/tests/fetch.py b/lib/bb/tests/fetch.py > index ec7d83c9..1fcf3466 100644 > --- a/lib/bb/tests/fetch.py > +++ b/lib/bb/tests/fetch.py > @@ -2776,3 +2776,46 @@ class GitSharedTest(FetcherTest): > fetcher.unpack(self.unpackdir) > alt = os.path.join(self.unpackdir, 'git/.git/objects/info/alternates') > self.assertFalse(os.path.exists(alt)) > + > +class GitUnpackOfflineTest(FetcherTest): > + > + def setUp(self): > + super(GitUnpackOfflineTest, self).setUp() > + self.tempdir = self.tempdir > + self.persistdir = os.path.join(self.tempdir, "persistdata") > + > + def setData(self): > + self.recipe_url = "git://github.com/lz4/lz4.git;branch=release;protocol=https;" > + self.d.setVar('SRCREV', 'v1.9.0') > + self.d.setVar("PERSISTENT_DIR", self.persistdir) > + self.d.setVar("DL_DIR", self.dldir) > + > + @skipIfNoNetwork() > + def test_unpack_offline(self): > + self.setData() > + fetcher = bb.fetch.Fetch([self.recipe_url], self.d) > + bb.fetch.fetcher_init(self.d) > + fetcher.download() > + fetcher.unpack(self.unpackdir) > + > + ## emulate do_unpack (disable network) > + self.d.setVar("BB_NO_NETWORK", "1") > + unpacker = bb.fetch.Fetch([self.recipe_url], self.d) > + bb.fetch.fetcher_init(self.d) > + unpacker.unpack(self.unpackdir) > + > + @skipIfNoNetwork() > + def test_unpack_rebaseable(self): > + self.setData() > + self.recipe_url += "rebaseable=1;" > + > + fetcher = bb.fetch.Fetch([self.recipe_url], self.d) > + bb.fetch.fetcher_init(self.d) > + fetcher.download() > + > + ## emulate do_unpack (disable network) > + self.d.setVar("BB_NO_NETWORK", "1") > + with self.assertRaises(bb.fetch2.UnpackError): > + unpacker = bb.fetch.Fetch([self.recipe_url], self.d) > + bb.fetch.fetcher_init(self.d) > + fetcher.unpack() > > > > -=-=-=-=-=-=-=-=-=-=-=- > Links: You receive all messages sent to this group. > View/Reply Online (#13317): https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.openembedded.org_g_bitbake-2Ddevel_message_13317&d=DwIFaQ&c=_sEr5x9kUWhuk4_nFwjJtA&r=LYjLexDn7rXIzVmkNPvw5ymA1XTSqHGq8yBP6m6qZZ4njZguQhZhkI_-172IIy1t&m=TAHnwIUCuZPQ7vBGmkvVBkcWKm3prVlA6UFcViaqYEScF_lgqMYpoqeboKd-qJeH&s=kNFDHQIcWkeZ-Ld6-oALgJMeOKvcmoWJh12dcki4Tg4&e= > Mute This Topic: https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.openembedded.org_mt_88969671_6293953&d=DwIFaQ&c=_sEr5x9kUWhuk4_nFwjJtA&r=LYjLexDn7rXIzVmkNPvw5ymA1XTSqHGq8yBP6m6qZZ4njZguQhZhkI_-172IIy1t&m=TAHnwIUCuZPQ7vBGmkvVBkcWKm3prVlA6UFcViaqYEScF_lgqMYpoqeboKd-qJeH&s=G2M6sBkCY4DYUNyfIHrwHZ6ZewTA8ayM1rPtPsE2w-Q&e= > Group Owner: bitbake-devel+owner@lists.openembedded.org > Unsubscribe: https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.openembedded.org_g_bitbake-2Ddevel_unsub&d=DwIFaQ&c=_sEr5x9kUWhuk4_nFwjJtA&r=LYjLexDn7rXIzVmkNPvw5ymA1XTSqHGq8yBP6m6qZZ4njZguQhZhkI_-172IIy1t&m=TAHnwIUCuZPQ7vBGmkvVBkcWKm3prVlA6UFcViaqYEScF_lgqMYpoqeboKd-qJeH&s=disYGNo9s0vbsx-5RloToeQhYcZwV84Oy_dSYrIttAM&e= [quentin.schulz@theobroma-systems.com] > -=-=-=-=-=-=-=-=-=-=-=- >
On Mon, 2022-02-07 at 12:53 +0100, Pavel Zhukov wrote: > Sometimes Fetcher is initialized while network is disabled (unpack() is > one of such cases) and Fetcher fails to map non SHA1 SRCREV to SHA1 one. > In such cases git ls-remote cannot be used to determinate latest > revision and git rev-parse can be used instead. This approach doesn't > work with rebaseable repos so exception will occur for such repos. > > See [YOCTO #14707] for more details. Note: the issue doesn't occur each > time in unpack() because in normal flow (fetch->unpack in one build) > URI_HEADREVs are cached. > > Signed-off-by: Pavel Zhukov <pavel.zhukov@huawei.com> > --- Thanks for looking at this. I like the idea of some tests to demonstrate the problem so we should start there. > +class GitUnpackOfflineTest(FetcherTest): > + > + def setUp(self): > + super(GitUnpackOfflineTest, self).setUp() > + self.tempdir = self.tempdir > + self.persistdir = os.path.join(self.tempdir, "persistdata") > + > + def setData(self): > + self.recipe_url = "git://github.com/lz4/lz4.git;branch=release;protocol=https;" > + self.d.setVar('SRCREV', 'v1.9.0') > + self.d.setVar("PERSISTENT_DIR", self.persistdir) > + self.d.setVar("DL_DIR", self.dldir) > + > + @skipIfNoNetwork() > + def test_unpack_offline(self): > + self.setData() > + fetcher = bb.fetch.Fetch([self.recipe_url], self.d) > + bb.fetch.fetcher_init(self.d) > + fetcher.download() > + fetcher.unpack(self.unpackdir) > + > + ## emulate do_unpack (disable network) > + self.d.setVar("BB_NO_NETWORK", "1") > + unpacker = bb.fetch.Fetch([self.recipe_url], self.d) > + bb.fetch.fetcher_init(self.d) > + unpacker.unpack(self.unpackdir) > + I don't think this is a valid test. I think it should only call bb.fetch.fetcher_init() once and at the start of the test before the Fetch() call. If we change it to do that, the test passes. Going back to your original bug report, the issue could be that bb.fetch.fetcher_init() is being called in the do_unpack task and "destroying" the source revs in the worker context. I suspect but haven't looked into that yet. What we may need to do is transfer the bb.fetch2.saved_headrevs into the worker context? Cheers, Richard
diff --git a/lib/bb/fetch2/git.py b/lib/bb/fetch2/git.py index 30da8e95..5f56d9fe 100644 --- a/lib/bb/fetch2/git.py +++ b/lib/bb/fetch2/git.py @@ -122,6 +122,10 @@ class Git(FetchMethod): def init(self, d): pass + @staticmethod + def _is_sha1(name): + return len(name) == 40 and (True in [c in "abcdef0123456789" for c in name]) + def supports(self, ud, d): """ Check to see if a given url can be fetched with git. @@ -135,6 +139,8 @@ class Git(FetchMethod): """ init git specific variable within url data so that the git method like latest_revision() can work + NOTE: This function may be called from do_unpack and in that case network is not available + avoid using network operations here or guard them properly. """ if 'protocol' in ud.parm: ud.proto = ud.parm['protocol'] @@ -248,28 +254,33 @@ class Git(FetchMethod): ud.setup_revisions(d) - for name in ud.names: - # Ensure anything that doesn't look like a sha256 checksum/revision is translated into one - if not ud.revisions[name] or len(ud.revisions[name]) != 40 or (False in [c in "abcdef0123456789" for c in ud.revisions[name]]): - if ud.revisions[name]: - ud.unresolvedrev[name] = ud.revisions[name] - ud.revisions[name] = self.latest_revision(ud, d, name) - gitsrcname = '%s%s' % (ud.host.replace(':', '.'), ud.path.replace('/', '.').replace('*', '.').replace(' ','_')) if gitsrcname.startswith('.'): gitsrcname = gitsrcname[1:] - # for rebaseable git repo, it is necessary to keep mirror tar ball - # per revision, so that even the revision disappears from the - # upstream repo in the future, the mirror will remain intact and still - # contains the revision - if ud.rebaseable: - for name in ud.names: - gitsrcname = gitsrcname + '_' + ud.revisions[name] dl_dir = d.getVar("DL_DIR") gitdir = d.getVar("GITDIR") or (dl_dir + "/git2") + + + ## We need clonedir to be defined before latest_revision call for unpack routine to use rev-parse instead of ls-remote ud.clonedir = os.path.join(gitdir, gitsrcname) + + for name in ud.names: + # Ensure anything that doesn't look like a sha1 checksum/revision is translated into one + if not ud.revisions[name] or not self._is_sha1(ud.revisions[name]): + if ud.revisions[name]: + ud.unresolvedrev[name] = ud.revisions[name] + ud.revisions[name] = self.latest_revision(ud, d, name) + if ud.rebaseable: + # for rebaseable git repo, it is necessary to keep mirror tar ball + # per revision, so that even the revision disappears from the + # upstream repo in the future, the mirror will remain intact and still + # contains the revision + gitsrcname = gitsrcname + '_' + ud.revisions[name] + if ud.rebaseable: + ud.clonedir = os.path.join(gitdir, gitsrcname) + ud.localfile = ud.clonedir mirrortarball = 'git2_%s.tar.gz' % gitsrcname @@ -697,6 +708,14 @@ class Git(FetchMethod): slash_re = re.compile(r"/+") return "git:" + ud.host + slash_re.sub(".", ud.path) + ud.unresolvedrev[name] + def _revparse(self, ud, d, rev): + """ + Return latest revision (SHA) for the refs specified by branch/tag/etc + """ + cmd = '{basecmd} rev-parse -q --verify {rev}'.format(basecmd = ud.basecmd, rev = rev) + output = runfetchcmd(cmd, d, quiet=True, workdir = ud.clonedir) + return output.strip() + def _lsremote(self, ud, d, search): """ Run git ls-remote with the specified search string @@ -727,6 +746,20 @@ class Git(FetchMethod): """ Compute the HEAD revision for the url """ + + ## Network is not availble and we missed the cache. Try rev-parse + if d.getVar("BB_NO_NETWORK"): + if ud.rebaseable: + raise bb.fetch.UnpackError("Unable to map revision \"{name}\" to SHA1 revision without a network. " \ + "SHA1 revision must be specified for rebaseable repos "\ + "or \"BB_SRCREV_POLICY\" set to \"cache\". ".format(name=ud.unresolvedrev[name]),ud.host+ud.path) + rev = self._revparse(ud, d, ud.unresolvedrev[name]) + if rev is None: + raise bb.fetch2.FetchError("Unable to resolve '%s' in local clone of repository in git rev-parse output for %s" % \ + (ud.unresolvedrev[name], ud.host+ud.path)) + return rev + + output = self._lsremote(ud, d, "") # Tags of the form ^{} may not work, need to fallback to other form if ud.unresolvedrev[name][:5] == "refs/" or ud.usehead: diff --git a/lib/bb/tests/fetch.py b/lib/bb/tests/fetch.py index ec7d83c9..1fcf3466 100644 --- a/lib/bb/tests/fetch.py +++ b/lib/bb/tests/fetch.py @@ -2776,3 +2776,46 @@ class GitSharedTest(FetcherTest): fetcher.unpack(self.unpackdir) alt = os.path.join(self.unpackdir, 'git/.git/objects/info/alternates') self.assertFalse(os.path.exists(alt)) + +class GitUnpackOfflineTest(FetcherTest): + + def setUp(self): + super(GitUnpackOfflineTest, self).setUp() + self.tempdir = self.tempdir + self.persistdir = os.path.join(self.tempdir, "persistdata") + + def setData(self): + self.recipe_url = "git://github.com/lz4/lz4.git;branch=release;protocol=https;" + self.d.setVar('SRCREV', 'v1.9.0') + self.d.setVar("PERSISTENT_DIR", self.persistdir) + self.d.setVar("DL_DIR", self.dldir) + + @skipIfNoNetwork() + def test_unpack_offline(self): + self.setData() + fetcher = bb.fetch.Fetch([self.recipe_url], self.d) + bb.fetch.fetcher_init(self.d) + fetcher.download() + fetcher.unpack(self.unpackdir) + + ## emulate do_unpack (disable network) + self.d.setVar("BB_NO_NETWORK", "1") + unpacker = bb.fetch.Fetch([self.recipe_url], self.d) + bb.fetch.fetcher_init(self.d) + unpacker.unpack(self.unpackdir) + + @skipIfNoNetwork() + def test_unpack_rebaseable(self): + self.setData() + self.recipe_url += "rebaseable=1;" + + fetcher = bb.fetch.Fetch([self.recipe_url], self.d) + bb.fetch.fetcher_init(self.d) + fetcher.download() + + ## emulate do_unpack (disable network) + self.d.setVar("BB_NO_NETWORK", "1") + with self.assertRaises(bb.fetch2.UnpackError): + unpacker = bb.fetch.Fetch([self.recipe_url], self.d) + bb.fetch.fetcher_init(self.d) + fetcher.unpack()
Sometimes Fetcher is initialized while network is disabled (unpack() is one of such cases) and Fetcher fails to map non SHA1 SRCREV to SHA1 one. In such cases git ls-remote cannot be used to determinate latest revision and git rev-parse can be used instead. This approach doesn't work with rebaseable repos so exception will occur for such repos. See [YOCTO #14707] for more details. Note: the issue doesn't occur each time in unpack() because in normal flow (fetch->unpack in one build) URI_HEADREVs are cached. Signed-off-by: Pavel Zhukov <pavel.zhukov@huawei.com> --- lib/bb/fetch2/git.py | 61 +++++++++++++++++++++++++++++++++---------- lib/bb/tests/fetch.py | 43 ++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 14 deletions(-)