From patchwork Thu Mar 5 15:32:14 2026 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pascal Eberhard via B4 Relay X-Patchwork-Id: 82589 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from aws-us-west-2-korg-lkml-1.web.codeaurora.org (localhost.localdomain [127.0.0.1]) by smtp.lore.kernel.org (Postfix) with ESMTP id 69274F33A9F for ; Thu, 5 Mar 2026 15:32:28 +0000 (UTC) Received: from tor.source.kernel.org (tor.source.kernel.org [172.105.4.254]) by mx.groups.io with SMTP id smtpd.msgproc02-g2.46976.1772724741187335646 for ; Thu, 05 Mar 2026 07:32:21 -0800 Authentication-Results: mx.groups.io; dkim=pass header.i=@kernel.org header.s=k20201202 header.b=VqOjhjTz; spf=pass (domain: kernel.org, ip: 172.105.4.254, mailfrom: devnull+pascal.eberhard.se.com@kernel.org) Received: from smtp.kernel.org (transwarp.subspace.kernel.org [100.75.92.58]) by tor.source.kernel.org (Postfix) with ESMTP id 38A2D61340; Thu, 5 Mar 2026 15:32:20 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPS id E0AADC2BC87; Thu, 5 Mar 2026 15:32:19 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1772724739; bh=FxrKfoS+ixJt0y0yayYTPGhZyuEtE5qtqcYMNHhGq18=; h=From:Date:Subject:References:In-Reply-To:To:Cc:Reply-To:From; b=VqOjhjTzCbDUGbey+C4sL0XA4jdgQ3vtotP0xn/OKuPhkbDAo6HCDqUEzLq8i3kLn HSNhZMwTLQF0txuB1nNSV3jRpyA/PrAxw4KCOHn3xWrH2AG4GBVroTmem4z7y2w7Gr 2gN2br1T3yKKg0Nmhri1pEwFOIHZxBGzhPQgGOCsWGwnt5Rclo945i0dloZaPwg2zl J0cKu3OFQrDWUyxWRy85BLcQmX4+SwqijJHRQzGLMMHqp+wnn8J0s3UxxNGnaPwU19 YH9Yl+tVn5cPccQ8iFa+ii0Sd/WQP4daJ667/iFlt9Vk7f7KKGk2TgpfoBOdV9ZbeX vfwr6g/lC8sew== Received: from aws-us-west-2-korg-lkml-1.web.codeaurora.org (localhost.localdomain [127.0.0.1]) by smtp.lore.kernel.org (Postfix) with ESMTP id D4748F33A92; Thu, 5 Mar 2026 15:32:19 +0000 (UTC) From: Pascal Eberhard via B4 Relay Date: Thu, 05 Mar 2026 16:32:14 +0100 Subject: [PATCH 1/4] fetch2: add curl method to fetch web content MIME-Version: 1.0 Message-Id: <20260305-add_alt_fetch_method_curl-v1-1-0d0220e5fa59@se.com> References: <20260305-add_alt_fetch_method_curl-v1-0-0d0220e5fa59@se.com> In-Reply-To: <20260305-add_alt_fetch_method_curl-v1-0-0d0220e5fa59@se.com> To: bitbake-devel@lists.openembedded.org Cc: Pascal Eberhard X-Mailer: b4 0.13.0 X-Developer-Signature: v=1; a=ed25519-sha256; t=1772724738; l=7328; i=pascal.eberhard@se.com; s=20260304; h=from:subject:message-id; bh=6SdmvTHBkNPWmwuu/UBdWIerEVPkSkIo+7UJlD9w77s=; b=j4HciAgx+kjH00XWyHDxf8jX10qr3t5I/VhKpxPszu61E+LLvA5KZGjvnBjd8G3KJUgVy24qr Tz6u9/7BDE0CUAJgzRBDKPhMev+QAroEh20hafX31n1TC1cKgA+XnLn X-Developer-Key: i=pascal.eberhard@se.com; a=ed25519; pk=J7TbFctjt1RVuN5K0juhf/w1E9bAfSeoQ/JG1vV9mWg= X-Endpoint-Received: by B4 Relay for pascal.eberhard@se.com/20260304 with auth_id=661 X-Original-From: Pascal Eberhard Reply-To: pascal.eberhard@se.com List-Id: X-Webhook-Received: from 45-33-107-173.ip.linodeusercontent.com [45.33.107.173] by aws-us-west-2-korg-lkml-1.web.codeaurora.org with HTTPS for ; Thu, 05 Mar 2026 15:32:28 -0000 X-Groupsio-URL: https://lists.openembedded.org/g/bitbake-devel/message/19113 From: Pascal Eberhard curl fetch method is an alternative fetch method for web downloads. It is based on curl cmdline tool and provides the same http, https, ftp and ftps protocols as wget. It supports some new features as well such as hostname resolution by the proxy when using SOCKS5 proxy. Signed-off-by: Pascal Eberhard --- lib/bb/fetch2/curl.py | 162 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) diff --git a/lib/bb/fetch2/curl.py b/lib/bb/fetch2/curl.py new file mode 100644 index 000000000..250805233 --- /dev/null +++ b/lib/bb/fetch2/curl.py @@ -0,0 +1,162 @@ +""" +BitBake 'Fetch' implementations for web downloads based on curl. + +curl fetch method is an alternative to existing wget method and can be enabled +by setting bitbake variable: + BB_FETCH_METHOD_HTTP = "curl" + +curl fetch method provides new features such as hostname resolution by the +proxy itself when using SOCKS5 proxy. It can be set with environment variable: + all_proxy="socks5h://..."" +""" + +# Copyright (C) 2026, Schneider Electric +# +# SPDX-License-Identifier: GPL-2.0-only +# +# Based on the wget fetcher method, Copyright 2003 Holger Schurig + +import os +import re +import shlex +import tempfile + +import bb +import bb.fetch2 +import bb.progress +import bb.utils +from bb.data_smart import DataSmart +from bb.fetch2 import FetchData, FetchError, logger, runfetchcmd +from bb.fetch2.wget import Wget + + +class CurlProgressHandler(bb.progress.LineFilterProgressHandler): + """ + Extract progress information from curl commandline output. + Note: relies on --progress-bar being specified on the curl command line. + """ + + def __init__(self, d: DataSmart): + super(CurlProgressHandler, self).__init__(d) + # Send an initial progress event so the bar gets shown + self._fire_progress(0) + + def writeline(self, line: str): + matches = re.findall(r' ([\d]+)\.\d%', line) + if matches: + progress = int(matches[0]) + self.update(progress) + return False + return True + + +class Curl(Wget): + """ + Class to fetch urls via curl cmdline tool. + The code not related to the cmdline is the same between wget and curl. + Curl class inherits Wget class to avoid code duplication. + """ + + def is_enabled(self, d) -> bool: + """ + curl method is enabled when BB_FETCH_METHOD_HTTP = "curl" only. + """ + method_http: str = d.getVar("BB_FETCH_METHOD_HTTP") + return method_http == "curl" + + def supports(self, ud: FetchData, d: DataSmart) -> bool: + """ + Check if a given url can be fetched with curl. + """ + if not self.is_enabled(d): + return False + if ud.type not in ['http', 'https', 'ftp', 'ftps']: + return False + logger.debug2("Fetch method 'curl' enabled") + return True + + def urldata_init(self, ud: FetchData, d: DataSmart): + if 'protocol' in ud.parm: + if ud.parm['protocol'] == 'git': + raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url) + + if 'downloadfilename' in ud.parm: + ud.basename: str = ud.parm['downloadfilename'] + else: + ud.basename: str = os.path.basename(ud.path) + + ud.localfile = ud.basename + if not ud.localfile: + ud.localfile = ud.host + ud.path.replace("/", ".") + + # --retry 1: equivalent to --tries=2 of wget. + # --speed-limit 1 --speed-time 100 --connect-timeout 100: equivalent to --timeout=100 option of wget. + # --location: redo request on new location when a page as moved, indicated with 3xx response code. + # --fail: fails with exit code when server generates HTML error rather than writing HTML error to output. + self.basecmd: str = d.getVar("FETCHCMD_curl") or "/usr/bin/env curl --retry 1 --speed-limit 1 --speed-time 100 --connect-timeout 100 --location --fail" + + if ud.type == 'ftp' or ud.type == 'ftps': + self.basecmd += " --ftp-pasv" + + if not self.check_certs(d): + self.basecmd += " --insecure" + + def _runcurl(self, ud: FetchData, d: DataSmart, command: str, quiet: bool, workdir: str | None = None): + progresshandler = CurlProgressHandler(d) + + logger.debug2("Fetching %s using command '%s'" % (ud.url, command)) + bb.fetch2.check_network_access(d, command, ud.url) + runfetchcmd(command + " --progress-bar", d, quiet, log=progresshandler, workdir=workdir) + + def download(self, ud: FetchData, d: DataSmart): + """Fetch urls""" + fetchcmd: str = self.basecmd + dldir: str = os.path.realpath(d.getVar("DL_DIR")) + localpath: str = os.path.join(dldir, ud.localfile) + ".tmp" + bb.utils.mkdirhier(os.path.dirname(localpath)) + fetchcmd += " --output %s" % shlex.quote(localpath) + + if ud.user and ud.pswd: + fetchcmd += " --anyauth" + if ud.parm.get("redirectauth", "1") == "1": + fetchcmd += f" --user={ud.user}:{ud.pswd}" + + uri: str = ud.url.split(";")[0] + fetchcmd += f" --continue-at - '{uri}'" + + self._runcurl(ud, d, fetchcmd, False) + + # Sanity check since curl can pretend it succeed when it didn't + # Also, this used to happen if sourceforge sent us to the mirror page + if not os.path.exists(localpath): + raise FetchError(f"The fetch command returned success for url {uri} but {localpath} doesn't exist?!", uri) + + if os.path.getsize(localpath) == 0: + os.remove(localpath) + raise FetchError(f"The fetch of {uri} resulted in a zero size file?! Deleting and failing since this isn't right.", uri) + + # Try and verify any checksum now, meaning if it isn't correct, we don't remove the + # original file, which might be a race (imagine two recipes referencing the same + # source, one with an incorrect checksum) + bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False) + + # Remove the ".tmp" and move the file into position atomically + # Our lock prevents multiple writers but mirroring code may grab incomplete files + os.rename(localpath, localpath[:-4]) + + return True + + def _fetch_index(self, uri: str, ud: FetchData, d: DataSmart): + """ + Run fetch checkstatus to get directory information + """ + with tempfile.TemporaryDirectory(prefix="curl-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="curl-listing-") as f: + fetchcmd: str = self.basecmd + fetchcmd += f" --output {f.name} '{uri}'" + try: + self._runcurl(ud, d, fetchcmd, True, workdir=workdir) + fetchresult = f.read() + except bb.fetch2.BBFetchException: + fetchresult = "" + + return fetchresult