diff mbox series

[1/4] fetch2: add curl method to fetch web content

Message ID 20260305-add_alt_fetch_method_curl-v1-1-0d0220e5fa59@se.com
State New
Headers show
Series fetch2: add alternative fetch method based on curl | expand

Commit Message

Pascal Eberhard via B4 Relay March 5, 2026, 3:32 p.m. UTC
From: Pascal Eberhard <pascal.eberhard@se.com>

curl fetch method is an alternative fetch method for web downloads. It
is based on curl cmdline tool and provides the same http, https, ftp
and ftps protocols as wget. It supports some new features as well such
as hostname resolution by the proxy when using SOCKS5 proxy.

Signed-off-by: Pascal Eberhard <pascal.eberhard@se.com>
---
 lib/bb/fetch2/curl.py | 162 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 162 insertions(+)

Comments

Mathieu Dubois-Briand March 9, 2026, 7:40 a.m. UTC | #1
On Thu Mar 5, 2026 at 4:32 PM CET, Pascal Eberhard via B4 Relay via lists.openembedded.org wrote:
> From: Pascal Eberhard <pascal.eberhard@se.com>
>
> curl fetch method is an alternative fetch method for web downloads. It
> is based on curl cmdline tool and provides the same http, https, ftp
> and ftps protocols as wget. It supports some new features as well such
> as hostname resolution by the proxy when using SOCKS5 proxy.
>
> Signed-off-by: Pascal Eberhard <pascal.eberhard@se.com>
> ---

Hi Pascal,

Thanks for your patches.

> +
> +    def _runcurl(self, ud: FetchData, d: DataSmart, command: str, quiet: bool, workdir: str | None = None):

Writing type hint unions with a pipe was added recently to Python, 3.10
if I'm correct [1]. This is above the minimal 3.9 [2] version mandated
for bitbake.

And this is indeed failing on some supported distributions, at least on
Debian 11 and Rocky 9:

Traceback (most recent call last):
  File "/srv/pokybuild/yocto-worker/genericarm64/build/repos/bitbake/bin/bitbake-setup", line 27, in <module>
    import bb.msg
  File "/srv/pokybuild/yocto-worker/genericarm64/build/repos/bitbake/lib/bb/__init__.py", line 155, in <module>
    from bb import fetch2 as fetch
  File "/srv/pokybuild/yocto-worker/genericarm64/build/repos/bitbake/lib/bb/fetch2/__init__.py", line 2107, in <module>
    from . import curl
  File "/srv/pokybuild/yocto-worker/genericarm64/build/repos/bitbake/lib/bb/fetch2/curl.py", line 53, in <module>
    class Curl(Wget):
  File "/srv/pokybuild/yocto-worker/genericarm64/build/repos/bitbake/lib/bb/fetch2/curl.py", line 104, in Curl
    def _runcurl(self, ud: FetchData, d: DataSmart, command: str, quiet: bool, workdir: str | None = None):
TypeError: unsupported operand type(s) for |: 'type' and 'NoneType'

https://autobuilder.yoctoproject.org/valkyrie/#/builders/60/builds/3308
https://autobuilder.yoctoproject.org/valkyrie/#/builders/93/builds/3302

[1]: https://peps.python.org/pep-0604/
[2]: https://git.openembedded.org/bitbake/tree/lib/bb/__init__.py

Thanks,
Mathieu
diff mbox series

Patch

diff --git a/lib/bb/fetch2/curl.py b/lib/bb/fetch2/curl.py
new file mode 100644
index 000000000..250805233
--- /dev/null
+++ b/lib/bb/fetch2/curl.py
@@ -0,0 +1,162 @@ 
+"""
+BitBake 'Fetch' implementations for web downloads based on curl.
+
+curl fetch method is an alternative to existing wget method and can be enabled
+by setting bitbake variable:
+  BB_FETCH_METHOD_HTTP = "curl"
+
+curl fetch method provides new features such as hostname resolution by the
+proxy itself when using SOCKS5 proxy. It can be set with environment variable:
+  all_proxy="socks5h://...""
+"""
+
+# Copyright (C) 2026, Schneider Electric
+#
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Based on the wget fetcher method, Copyright 2003 Holger Schurig
+
+import os
+import re
+import shlex
+import tempfile
+
+import bb
+import bb.fetch2
+import bb.progress
+import bb.utils
+from bb.data_smart import DataSmart
+from bb.fetch2 import FetchData, FetchError, logger, runfetchcmd
+from bb.fetch2.wget import Wget
+
+
+class CurlProgressHandler(bb.progress.LineFilterProgressHandler):
+    """
+    Extract progress information from curl commandline output.
+    Note: relies on --progress-bar being specified on the curl command line.
+    """
+
+    def __init__(self, d: DataSmart):
+        super(CurlProgressHandler, self).__init__(d)
+        # Send an initial progress event so the bar gets shown
+        self._fire_progress(0)
+
+    def writeline(self, line: str):
+        matches = re.findall(r' ([\d]+)\.\d%', line)
+        if matches:
+            progress = int(matches[0])
+            self.update(progress)
+            return False
+        return True
+
+
+class Curl(Wget):
+    """
+    Class to fetch urls via curl cmdline tool.
+    The code not related to the cmdline is the same between wget and curl.
+    Curl class inherits Wget class to avoid code duplication.
+    """
+
+    def is_enabled(self, d) -> bool:
+        """
+        curl method is enabled when BB_FETCH_METHOD_HTTP = "curl" only.
+        """
+        method_http: str = d.getVar("BB_FETCH_METHOD_HTTP")
+        return method_http == "curl"
+
+    def supports(self, ud: FetchData, d: DataSmart) -> bool:
+        """
+        Check if a given url can be fetched with curl.
+        """
+        if not self.is_enabled(d):
+            return False
+        if ud.type not in ['http', 'https', 'ftp', 'ftps']:
+            return False
+        logger.debug2("Fetch method 'curl' enabled")
+        return True
+
+    def urldata_init(self, ud: FetchData, d: DataSmart):
+        if 'protocol' in ud.parm:
+            if ud.parm['protocol'] == 'git':
+                raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
+
+        if 'downloadfilename' in ud.parm:
+            ud.basename: str = ud.parm['downloadfilename']
+        else:
+            ud.basename: str = os.path.basename(ud.path)
+
+        ud.localfile = ud.basename
+        if not ud.localfile:
+            ud.localfile = ud.host + ud.path.replace("/", ".")
+
+        # --retry 1: equivalent to --tries=2 of wget.
+        # --speed-limit 1 --speed-time 100 --connect-timeout 100: equivalent to --timeout=100 option of wget.
+        # --location: redo request on new location when a page as moved, indicated with 3xx response code.
+        # --fail: fails with exit code when server generates HTML error rather than writing HTML error to output.
+        self.basecmd: str = d.getVar("FETCHCMD_curl") or "/usr/bin/env curl --retry 1 --speed-limit 1 --speed-time 100 --connect-timeout 100 --location --fail"
+
+        if ud.type == 'ftp' or ud.type == 'ftps':
+            self.basecmd += " --ftp-pasv"
+
+        if not self.check_certs(d):
+            self.basecmd += " --insecure"
+
+    def _runcurl(self, ud: FetchData, d: DataSmart, command: str, quiet: bool, workdir: str | None = None):
+        progresshandler = CurlProgressHandler(d)
+
+        logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
+        bb.fetch2.check_network_access(d, command, ud.url)
+        runfetchcmd(command + " --progress-bar", d, quiet, log=progresshandler, workdir=workdir)
+
+    def download(self, ud: FetchData, d: DataSmart):
+        """Fetch urls"""
+        fetchcmd: str = self.basecmd
+        dldir: str = os.path.realpath(d.getVar("DL_DIR"))
+        localpath: str = os.path.join(dldir, ud.localfile) + ".tmp"
+        bb.utils.mkdirhier(os.path.dirname(localpath))
+        fetchcmd += " --output %s" % shlex.quote(localpath)
+
+        if ud.user and ud.pswd:
+            fetchcmd += " --anyauth"
+            if ud.parm.get("redirectauth", "1") == "1":
+                fetchcmd += f" --user={ud.user}:{ud.pswd}"
+
+        uri: str = ud.url.split(";")[0]
+        fetchcmd += f" --continue-at - '{uri}'"
+
+        self._runcurl(ud, d, fetchcmd, False)
+
+        # Sanity check since curl can pretend it succeed when it didn't
+        # Also, this used to happen if sourceforge sent us to the mirror page
+        if not os.path.exists(localpath):
+            raise FetchError(f"The fetch command returned success for url {uri} but {localpath} doesn't exist?!", uri)
+
+        if os.path.getsize(localpath) == 0:
+            os.remove(localpath)
+            raise FetchError(f"The fetch of {uri} resulted in a zero size file?! Deleting and failing since this isn't right.", uri)
+
+        # Try and verify any checksum now, meaning if it isn't correct, we don't remove the
+        # original file, which might be a race (imagine two recipes referencing the same
+        # source, one with an incorrect checksum)
+        bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False)
+
+        # Remove the ".tmp" and move the file into position atomically
+        # Our lock prevents multiple writers but mirroring code may grab incomplete files
+        os.rename(localpath, localpath[:-4])
+
+        return True
+
+    def _fetch_index(self, uri: str, ud: FetchData, d: DataSmart):
+        """
+        Run fetch checkstatus to get directory information
+        """
+        with tempfile.TemporaryDirectory(prefix="curl-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="curl-listing-") as f:
+            fetchcmd: str = self.basecmd
+            fetchcmd += f" --output {f.name} '{uri}'"
+            try:
+                self._runcurl(ud, d, fetchcmd, True, workdir=workdir)
+                fetchresult = f.read()
+            except bb.fetch2.BBFetchException:
+                fetchresult = ""
+
+        return fetchresult