diff mbox series

[v2,1/2] fetch2/wget: handle HTTP 308 Permanent Redirect

Message ID 20241203200507.2957152-1-ross.burton@arm.com
State New
Headers show
Series [v2,1/2] fetch2/wget: handle HTTP 308 Permanent Redirect | expand

Commit Message

Ross Burton Dec. 3, 2024, 8:05 p.m. UTC
urllib2.HTTPRedirectHandler.redirect_request doesn't handle HTTP reponse
code 308 (Permanent Redirect). This was fixed in c379bc5 but can't be
worked around without copying the entire redirect_request() method.

When we can depend on Python 3.13, FixedHTTPRedirectHandler can be
removed.

Signed-off-by: Ross Burton <ross.burton@arm.com>
---
 bitbake/lib/bb/fetch2/wget.py | 42 ++++++++++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 5 deletions(-)
diff mbox series

Patch

diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py
index 773d41ca81a..fcb71246d9a 100644
--- a/bitbake/lib/bb/fetch2/wget.py
+++ b/bitbake/lib/bb/fetch2/wget.py
@@ -305,13 +305,45 @@  class Wget(FetchMethod):
 
         class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
             """
-            urllib2.HTTPRedirectHandler resets the method to GET on redirect,
-            when we want to follow redirects using the original method.
+            urllib2.HTTPRedirectHandler before 3.13 has two flaws:
+            
+            It resets the method to GET on redirect when we want to follow
+            redirects using the original method (typically HEAD). This was fixed
+            in 759e8e7.
+
+            It also doesn't handle 308 (Permanent Redirect). This was fixed in
+            c379bc5.
+
+            Until we depend on Python 3.13 onwards, copy the redirect_request
+            method to fix these issues.
             """
             def redirect_request(self, req, fp, code, msg, headers, newurl):
-                newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
-                newreq.get_method = req.get_method
-                return newreq
+                m = req.get_method()
+                if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
+                    or code in (301, 302, 303) and m == "POST")):
+                    raise urllib.HTTPError(req.full_url, code, msg, headers, fp)
+
+                # Strictly (according to RFC 2616), 301 or 302 in response to
+                # a POST MUST NOT cause a redirection without confirmation
+                # from the user (of urllib.request, in this case).  In practice,
+                # essentially all clients do redirect in this case, so we do
+                # the same.
+
+                # Be conciliant with URIs containing a space.  This is mainly
+                # redundant with the more complete encoding done in http_error_302(),
+                # but it is kept for compatibility with other callers.
+                newurl = newurl.replace(' ', '%20')
+
+                CONTENT_HEADERS = ("content-length", "content-type")
+                newheaders = {k: v for k, v in req.headers.items()
+                            if k.lower() not in CONTENT_HEADERS}
+                return urllib.request.Request(newurl,
+                            method="HEAD" if m == "HEAD" else "GET",
+                            headers=newheaders,
+                            origin_req_host=req.origin_req_host,
+                            unverifiable=True)
+
+            http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
 
         # We need to update the environment here as both the proxy and HTTPS
         # handlers need variables set. The proxy needs http_proxy and friends to