Message ID | 20240209160450.2871718-1-thomas.perrot@bootlin.com |
---|---|
State | New |
Headers | show |
Series | [bitbake-devel,v2] wget.py: always use the custom user agent | expand |
Dear maintainers, Could we please revert this commit? I'd propose to enable this via variable on per-recipe basis for those which point their SRC_URI to misbehaving servers. Sending browser user-agent causes Jfrog Artifactory to present a GUI as result and not directly the file itself. I think that in corporate environment using Artifactory for storing binary files is not that uncommon. And I think that there may be also other servers which try to beautify downloads for browsers. Thanks for considering this. Peter -----Original Message----- From: bitbake-devel@lists.openembedded.org <bitbake-devel@lists.openembedded.org> On Behalf Of Thomas Perrot via lists.openembedded.org Sent: Friday, February 9, 2024 17:05 To: bitbake-devel@lists.openembedded.org Cc: Thomas Perrot <thomas.perrot@bootlin.com> Subject: [bitbake-devel][PATCH v2] wget.py: always use the custom user agent From: Thomas Perrot <thomas.perrot@bootlin.com> Add the "--user-agent" paramater in the wget base command to perform all wget commands with this parameter, because a few HTTP servers block requests with the default wget user agent. For example, "hg.openjdk.org" never send a response to requests have been sent with wget: wget https://hg.openjdk.org/jdk8u/jdk8u/archive/jdk8u272-ga.tar.bz2 https://hg.openjdk.org/jdk8u/jdk8u/archive/jdk8u272-ga.tar.bz2 Resolving hg.openjdk.org (hg.openjdk.org)... 23.54.129.73 Connecting to hg.openjdk.org (hg.openjdk.org)|23.54.129.73|:443... connected. HTTP request sent, awaiting response... Signed-off-by: Thomas Perrot <thomas.perrot@bootlin.com> --- lib/bb/fetch2/wget.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/bb/fetch2/wget.py b/lib/bb/fetch2/wget.py index dc025800e659..bb38dd435827 100644 --- a/lib/bb/fetch2/wget.py +++ b/lib/bb/fetch2/wget.py @@ -87,7 +87,8 @@ class Wget(FetchMethod): if not ud.localfile: ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) - self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp" + self.basecmd = d.getVar("FETCHCMD_wget") \ + or "/usr/bin/env wget -t 2 -T 30 --passive-ftp + --user-agent='%s'" % (self.user_agent) if not self.check_certs(d): self.basecmd += " --no-check-certificate" @@ -454,7 +455,7 @@ class Wget(FetchMethod): f = tempfile.NamedTemporaryFile() with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: fetchcmd = self.basecmd - fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'" + fetchcmd += " -O " + f.name + " '" + uri + "'" try: self._runwget(ud, d, fetchcmd, True, workdir=workdir) fetchresult = f.read() @@ -492,7 +493,7 @@ class Wget(FetchMethod): valid = 1 elif self._vercmp(version, newver) < 0: version = newver - + pupver = re.sub('_', '.', version[1]) bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" % -- 2.43.0
I agree, this patch is over-reaching. It doesn't show that user_agent is actually set to "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0". I'd say servers have every right to send a response tailored for showing in an interactive browser when they see that. I'm sending a revert. The solution needs to be per-recipe. And if some server doesn't like wget, the real solution is opening a ticket with them. Alex On Tue, 20 Feb 2024 at 09:07, Peter Marko via lists.openembedded.org <peter.marko=siemens.com@lists.openembedded.org> wrote: > > Dear maintainers, > > Could we please revert this commit? > I'd propose to enable this via variable on per-recipe basis for those which point their SRC_URI to misbehaving servers. > > Sending browser user-agent causes Jfrog Artifactory to present a GUI as result and not directly the file itself. > I think that in corporate environment using Artifactory for storing binary files is not that uncommon. > And I think that there may be also other servers which try to beautify downloads for browsers. > > Thanks for considering this. > Peter > > -----Original Message----- > From: bitbake-devel@lists.openembedded.org <bitbake-devel@lists.openembedded.org> On Behalf Of Thomas Perrot via lists.openembedded.org > Sent: Friday, February 9, 2024 17:05 > To: bitbake-devel@lists.openembedded.org > Cc: Thomas Perrot <thomas.perrot@bootlin.com> > Subject: [bitbake-devel][PATCH v2] wget.py: always use the custom user agent > > From: Thomas Perrot <thomas.perrot@bootlin.com> > > Add the "--user-agent" paramater in the wget base command to perform all wget commands with this parameter, because a few HTTP servers block requests with the default wget user agent. > > For example, "hg.openjdk.org" never send a response to requests have been sent with wget: > wget https://hg.openjdk.org/jdk8u/jdk8u/archive/jdk8u272-ga.tar.bz2 > https://hg.openjdk.org/jdk8u/jdk8u/archive/jdk8u272-ga.tar.bz2 > Resolving hg.openjdk.org (hg.openjdk.org)... 23.54.129.73 Connecting to hg.openjdk.org (hg.openjdk.org)|23.54.129.73|:443... connected. > HTTP request sent, awaiting response... > > Signed-off-by: Thomas Perrot <thomas.perrot@bootlin.com> > --- > lib/bb/fetch2/wget.py | 7 ++++--- > 1 file changed, 4 insertions(+), 3 deletions(-) > > diff --git a/lib/bb/fetch2/wget.py b/lib/bb/fetch2/wget.py index dc025800e659..bb38dd435827 100644 > --- a/lib/bb/fetch2/wget.py > +++ b/lib/bb/fetch2/wget.py > @@ -87,7 +87,8 @@ class Wget(FetchMethod): > if not ud.localfile: > ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) > > - self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp" > + self.basecmd = d.getVar("FETCHCMD_wget") \ > + or "/usr/bin/env wget -t 2 -T 30 --passive-ftp > + --user-agent='%s'" % (self.user_agent) > > if not self.check_certs(d): > self.basecmd += " --no-check-certificate" > @@ -454,7 +455,7 @@ class Wget(FetchMethod): > f = tempfile.NamedTemporaryFile() > with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: > fetchcmd = self.basecmd > - fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'" > + fetchcmd += " -O " + f.name + " '" + uri + "'" > try: > self._runwget(ud, d, fetchcmd, True, workdir=workdir) > fetchresult = f.read() > @@ -492,7 +493,7 @@ class Wget(FetchMethod): > valid = 1 > elif self._vercmp(version, newver) < 0: > version = newver > - > + > pupver = re.sub('_', '.', version[1]) > > bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" % > -- > 2.43.0 > > > -=-=-=-=-=-=-=-=-=-=-=- > Links: You receive all messages sent to this group. > View/Reply Online (#15945): https://lists.openembedded.org/g/bitbake-devel/message/15945 > Mute This Topic: https://lists.openembedded.org/mt/104261171/1686489 > Group Owner: bitbake-devel+owner@lists.openembedded.org > Unsubscribe: https://lists.openembedded.org/g/bitbake-devel/unsub [alex.kanavin@gmail.com] > -=-=-=-=-=-=-=-=-=-=-=- >
I also noticed: WARNING: swig-native-4.2.0-r0 do_fetch: Checksum failure encountered with download of https://downloads.sourceforge.net/swig/swig-4.2.0.tar.gz - will attempt other sources if available And indeed, wget with the firefox user-agent will get you a fancy html page from sourceforge, and not the actual tarball. Try: $ wget --user-agent='Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0' https://downloads.sourceforge.net/swig/swig-4.2.0.tar.gz Revert revert revert. Alex On Tue, 20 Feb 2024 at 10:20, Alexander Kanavin via lists.openembedded.org <alex.kanavin=gmail.com@lists.openembedded.org> wrote: > > I agree, this patch is over-reaching. It doesn't show that user_agent > is actually set to "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) > Gecko/20100101 Firefox/84.0". > > I'd say servers have every right to send a response tailored for > showing in an interactive browser when they see that. > > I'm sending a revert. The solution needs to be per-recipe. And if some > server doesn't like wget, the real solution is opening a ticket with > them. > > Alex > > On Tue, 20 Feb 2024 at 09:07, Peter Marko via lists.openembedded.org > <peter.marko=siemens.com@lists.openembedded.org> wrote: > > > > Dear maintainers, > > > > Could we please revert this commit? > > I'd propose to enable this via variable on per-recipe basis for those which point their SRC_URI to misbehaving servers. > > > > Sending browser user-agent causes Jfrog Artifactory to present a GUI as result and not directly the file itself. > > I think that in corporate environment using Artifactory for storing binary files is not that uncommon. > > And I think that there may be also other servers which try to beautify downloads for browsers. > > > > Thanks for considering this. > > Peter > > > > -----Original Message----- > > From: bitbake-devel@lists.openembedded.org <bitbake-devel@lists.openembedded.org> On Behalf Of Thomas Perrot via lists.openembedded.org > > Sent: Friday, February 9, 2024 17:05 > > To: bitbake-devel@lists.openembedded.org > > Cc: Thomas Perrot <thomas.perrot@bootlin.com> > > Subject: [bitbake-devel][PATCH v2] wget.py: always use the custom user agent > > > > From: Thomas Perrot <thomas.perrot@bootlin.com> > > > > Add the "--user-agent" paramater in the wget base command to perform all wget commands with this parameter, because a few HTTP servers block requests with the default wget user agent. > > > > For example, "hg.openjdk.org" never send a response to requests have been sent with wget: > > wget https://hg.openjdk.org/jdk8u/jdk8u/archive/jdk8u272-ga.tar.bz2 > > https://hg.openjdk.org/jdk8u/jdk8u/archive/jdk8u272-ga.tar.bz2 > > Resolving hg.openjdk.org (hg.openjdk.org)... 23.54.129.73 Connecting to hg.openjdk.org (hg.openjdk.org)|23.54.129.73|:443... connected. > > HTTP request sent, awaiting response... > > > > Signed-off-by: Thomas Perrot <thomas.perrot@bootlin.com> > > --- > > lib/bb/fetch2/wget.py | 7 ++++--- > > 1 file changed, 4 insertions(+), 3 deletions(-) > > > > diff --git a/lib/bb/fetch2/wget.py b/lib/bb/fetch2/wget.py index dc025800e659..bb38dd435827 100644 > > --- a/lib/bb/fetch2/wget.py > > +++ b/lib/bb/fetch2/wget.py > > @@ -87,7 +87,8 @@ class Wget(FetchMethod): > > if not ud.localfile: > > ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) > > > > - self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp" > > + self.basecmd = d.getVar("FETCHCMD_wget") \ > > + or "/usr/bin/env wget -t 2 -T 30 --passive-ftp > > + --user-agent='%s'" % (self.user_agent) > > > > if not self.check_certs(d): > > self.basecmd += " --no-check-certificate" > > @@ -454,7 +455,7 @@ class Wget(FetchMethod): > > f = tempfile.NamedTemporaryFile() > > with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: > > fetchcmd = self.basecmd > > - fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'" > > + fetchcmd += " -O " + f.name + " '" + uri + "'" > > try: > > self._runwget(ud, d, fetchcmd, True, workdir=workdir) > > fetchresult = f.read() > > @@ -492,7 +493,7 @@ class Wget(FetchMethod): > > valid = 1 > > elif self._vercmp(version, newver) < 0: > > version = newver > > - > > + > > pupver = re.sub('_', '.', version[1]) > > > > bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" % > > -- > > 2.43.0 > > > > > > > > > > -=-=-=-=-=-=-=-=-=-=-=- > Links: You receive all messages sent to this group. > View/Reply Online (#15946): https://lists.openembedded.org/g/bitbake-devel/message/15946 > Mute This Topic: https://lists.openembedded.org/mt/104261171/1686489 > Group Owner: bitbake-devel+owner@lists.openembedded.org > Unsubscribe: https://lists.openembedded.org/g/bitbake-devel/unsub [alex.kanavin@gmail.com] > -=-=-=-=-=-=-=-=-=-=-=- >
diff --git a/lib/bb/fetch2/wget.py b/lib/bb/fetch2/wget.py index dc025800e659..bb38dd435827 100644 --- a/lib/bb/fetch2/wget.py +++ b/lib/bb/fetch2/wget.py @@ -87,7 +87,8 @@ class Wget(FetchMethod): if not ud.localfile: ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) - self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp" + self.basecmd = d.getVar("FETCHCMD_wget") \ + or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --user-agent='%s'" % (self.user_agent) if not self.check_certs(d): self.basecmd += " --no-check-certificate" @@ -454,7 +455,7 @@ class Wget(FetchMethod): f = tempfile.NamedTemporaryFile() with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: fetchcmd = self.basecmd - fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'" + fetchcmd += " -O " + f.name + " '" + uri + "'" try: self._runwget(ud, d, fetchcmd, True, workdir=workdir) fetchresult = f.read() @@ -492,7 +493,7 @@ class Wget(FetchMethod): valid = 1 elif self._vercmp(version, newver) < 0: version = newver - + pupver = re.sub('_', '.', version[1]) bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %