diff mbox series

[wrynose,2.18,v2,7/8] fetch2/wget: limit auth on checkstatus redirects

Message ID 73625fd3cb82bd3f9241f77d4b1e9b77fc828860.1781183212.git.yoann.congal@smile.fr
State New
Headers show
Series [wrynose,2.18,v2,1/8] fetch2: reraise IOError during download | expand

Commit Message

Yoann Congal June 11, 2026, 1:11 p.m. UTC
From: Anders Heimer <anders.heimer@est.tech>

FixedHTTPRedirectHandler copies request headers when checkstatus()
follows a redirect, including Authorization from SRC_URI or .netrc.

Keep same-origin redirects unchanged, but drop Authorization and Cookie
for different-origin targets (scheme, host and effective port), following
RFC 9110 redirect guidance for resource-specific headers. This only
affects the Python checkstatus() path; normal wget downloads are
unchanged.

Signed-off-by: Anders Heimer <anders.heimer@est.tech>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
(cherry picked from commit 1019d5a5c42c672ea673ae9d22363d626b57ccb9)
Signed-off-by: Yoann Congal <yoann.congal@smile.fr>
---
 lib/bb/fetch2/wget.py | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/lib/bb/fetch2/wget.py b/lib/bb/fetch2/wget.py
index 6ac4306c0..fc83b301a 100644
--- a/lib/bb/fetch2/wget.py
+++ b/lib/bb/fetch2/wget.py
@@ -288,6 +288,18 @@  class Wget(FetchMethod):
             http_error_403 = http_error_405
 
 
+        def _url_origin(url):
+            parsed = urllib.parse.urlsplit(url)
+            scheme = parsed.scheme.lower()
+            host = parsed.hostname.lower() if parsed.hostname else ""
+            port = parsed.port
+            if port is None:
+                port = {"http": 80, "https": 443}.get(scheme)
+            return (scheme, host, port)
+
+        def _same_origin(url_a, url_b):
+            return _url_origin(url_a) == _url_origin(url_b)
+
         class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
             """
             urllib2.HTTPRedirectHandler before 3.13 has two flaws:
@@ -301,6 +313,9 @@  class Wget(FetchMethod):
 
             Until we depend on Python 3.13 onwards, copy the redirect_request
             method to fix these issues.
+
+            Additionally, strip sensitive headers (Authorization, Cookie) when
+            redirecting to a different origin to avoid credential leaks.
             """
             def redirect_request(self, req, fp, code, msg, headers, newurl):
                 m = req.get_method()
@@ -320,8 +335,16 @@  class Wget(FetchMethod):
                 newurl = newurl.replace(' ', '%20')
 
                 CONTENT_HEADERS = ("content-length", "content-type")
-                newheaders = {k: v for k, v in req.headers.items()
-                            if k.lower() not in CONTENT_HEADERS}
+                SENSITIVE_REDIRECT_HEADERS = ("authorization", "cookie")
+                same_origin = _same_origin(req.get_full_url(), newurl)
+                newheaders = {}
+                for k, v in req.headers.items():
+                    header = k.lower()
+                    if header in CONTENT_HEADERS:
+                        continue
+                    if not same_origin and header in SENSITIVE_REDIRECT_HEADERS:
+                        continue
+                    newheaders[k] = v
                 return urllib.request.Request(newurl,
                             method="HEAD" if m == "HEAD" else "GET",
                             headers=newheaders,