| Message ID | 20260312223619.3735215-1-richard.purdie@linuxfoundation.org |
|---|---|
| State | New |
| Headers | show |
| Series | fetch/wget: Improve connection error handling | expand |
On Thu Mar 12, 2026 at 11:36 PM CET, Richard Purdie via lists.openembedded.org wrote: > We see occasional connection errors in wget testing of sstate mirrors. > > It appears there is a case where http.client.RemoteDisconnected is > returned against getrepsonse() which the current code doesn't handle > well. > > Rather than trying to handle very specific error cases, catch any errors > and drop the cached connection in those cases in the do_open() code. > > Similarly, try again, once in the case of errors in all cases rather > than trying to handle a specific exception list. > > The traceback from the logs is included below for reference. > > Traceback (most recent call last): > File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 415, in checkstatus > with opener.open(r, timeout=100) as response: > ^^^^^^^^^^^^^^^^^^^^^^^^^^^ > File "/usr/lib/python3.11/urllib/request.py", line 519, in open > response = self._open(req, data) > ^^^^^^^^^^^^^^^^^^^^^ > File "/usr/lib/python3.11/urllib/request.py", line 536, in _open > result = self._call_chain(self.handle_open, protocol, protocol + > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > File "/usr/lib/python3.11/urllib/request.py", line 496, in _call_chain > result = func(*args) > ^^^^^^^^^^^ > File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 178, in http_open > return self.do_open(HTTPConnectionCache, req) > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 249, in do_open > r = h.getresponse() > ^^^^^^^^^^^^^^^ > File "/usr/lib/python3.11/http/client.py", line 1374, in getresponse > response.begin() > File "/usr/lib/python3.11/http/client.py", line 318, in begin > version, status, reason = self._read_status() > ^^^^^^^^^^^^^^^^^^^ > File "/usr/lib/python3.11/http/client.py", line 287, in _read_status > raise RemoteDisconnected("Remote end closed connection without" > http.client.RemoteDisconnected: Remote end closed connection without response > > During handling of the above exception, another exception occurred: > > Traceback (most recent call last): > File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 228, in do_open > h.request(req.get_method(), req.selector, req.data, headers) > File "/usr/lib/python3.11/http/client.py", line 1282, in request > self._send_request(method, url, body, headers, encode_chunked) > File "/usr/lib/python3.11/http/client.py", line 1328, in _send_request > self.endheaders(body, encode_chunked=encode_chunked) > File "/usr/lib/python3.11/http/client.py", line 1277, in endheaders > self._send_output(message_body, encode_chunked=encode_chunked) > File "/usr/lib/python3.11/http/client.py", line 1037, in _send_output > self.send(msg) > File "/usr/lib/python3.11/http/client.py", line 998, in send > self.sock.sendall(data) > OSError: [Errno 9] Bad file descriptor > > During handling of the above exception, another exception occurred: > > Traceback (most recent call last): > File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 415, in checkstatus > with opener.open(r, timeout=100) as response: > ^^^^^^^^^^^^^^^^^^^^^^^^^^^ > File "/usr/lib/python3.11/urllib/request.py", line 519, in open > response = self._open(req, data) > ^^^^^^^^^^^^^^^^^^^^^ > File "/usr/lib/python3.11/urllib/request.py", line 536, in _open > result = self._call_chain(self.handle_open, protocol, protocol + > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > File "/usr/lib/python3.11/urllib/request.py", line 496, in _call_chain > result = func(*args) > ^^^^^^^^^^^ > File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 178, in http_open > return self.do_open(HTTPConnectionCache, req) > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 246, in do_open > raise urllib.error.URLError(err) > > Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org> > --- Could that be the fix for bug #15945? AB-INT: fetch/sstate/CDN - Missing objects in the cache: Bad file descriptor https://bugzilla.yoctoproject.org/show_bug.cgi?id=15945 > lib/bb/fetch2/wget.py | 41 +++++++++++++++-------------------------- > 1 file changed, 15 insertions(+), 26 deletions(-) > > diff --git a/lib/bb/fetch2/wget.py b/lib/bb/fetch2/wget.py > index 4e3505599b4..a9d432b25d3 100644 > --- a/lib/bb/fetch2/wget.py > +++ b/lib/bb/fetch2/wget.py > @@ -216,31 +216,20 @@ class Wget(FetchMethod): > > try: > h.request(req.get_method(), req.selector, req.data, headers) > - except socket.error as err: # XXX what error? > - # Don't close connection when cache is enabled. > - # Instead, try to detect connections that are no longer > - # usable (for example, closed unexpectedly) and remove > - # them from the cache. > - if fetch.connection_cache is None: > - h.close() > - elif isinstance(err, OSError) and err.errno == errno.EBADF: > - # This happens when the server closes the connection despite the Keep-Alive. > - # Apparently urllib then uses the file descriptor, expecting it to be > - # connected, when in reality the connection is already gone. > - # We let the request fail and expect it to be > - # tried once more ("try_again" in check_status()), > - # with the dead connection removed from the cache. > - # If it still fails, we give up, which can happen for bad > - # HTTP proxy settings. > + r = h.getresponse() > + except: > + # This can happen when the server closes the connection despite the Keep-Alive. > + # Apparently urllib then uses the file descriptor, expecting it to be > + # connected, when in reality the connection is already gone. > + # We let the request fail and expect it to be > + # tried once more ("try_again" in check_status()), > + # with the dead connection removed from the cache. > + # If it still fails, we give up, which can happen for bad > + # HTTP proxy settings. > + if fetch.connection_cache: > fetch.connection_cache.remove_connection(h.host, h.port) > - raise urllib.error.URLError(err) > - else: > - try: > - r = h.getresponse() > - except TimeoutError as e: > - if fetch.connection_cache: > - fetch.connection_cache.remove_connection(h.host, h.port) > - raise TimeoutError(e) > + h.close() > + raise > > # Pick apart the HTTPResponse object to get the addinfourl > # object initialized properly. > @@ -404,9 +393,9 @@ class Wget(FetchMethod): > > with opener.open(r, timeout=100) as response: > pass > - except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e: > + except Exception as e: > if try_again: > - logger.debug2("checkstatus: trying again") > + logger.debug2("checkstatus: trying again after exception %s" % str(e)) > return self.checkstatus(fetch, ud, d, False) > else: > # debug for now to avoid spamming the logs in e.g. remote sstate searches
On Thu, 2026-03-12 at 23:49 +0100, Yoann Congal wrote: > On Thu Mar 12, 2026 at 11:36 PM CET, Richard Purdie via lists.openembedded.org wrote: > > We see occasional connection errors in wget testing of sstate mirrors. > > > > It appears there is a case where http.client.RemoteDisconnected is > > returned against getrepsonse() which the current code doesn't handle > > well. > > > > Rather than trying to handle very specific error cases, catch any errors > > and drop the cached connection in those cases in the do_open() code. > > > > Similarly, try again, once in the case of errors in all cases rather > > than trying to handle a specific exception list. > > > > The traceback from the logs is included below for reference. > > > > Traceback (most recent call last): > > File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 415, in checkstatus > > with opener.open(r, timeout=100) as response: > > ^^^^^^^^^^^^^^^^^^^^^^^^^^^ > > File "/usr/lib/python3.11/urllib/request.py", line 519, in open > > response = self._open(req, data) > > ^^^^^^^^^^^^^^^^^^^^^ > > File "/usr/lib/python3.11/urllib/request.py", line 536, in _open > > result = self._call_chain(self.handle_open, protocol, protocol + > > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > > File "/usr/lib/python3.11/urllib/request.py", line 496, in _call_chain > > result = func(*args) > > ^^^^^^^^^^^ > > File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 178, in http_open > > return self.do_open(HTTPConnectionCache, req) > > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > > File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 249, in do_open > > r = h.getresponse() > > ^^^^^^^^^^^^^^^ > > File "/usr/lib/python3.11/http/client.py", line 1374, in getresponse > > response.begin() > > File "/usr/lib/python3.11/http/client.py", line 318, in begin > > version, status, reason = self._read_status() > > ^^^^^^^^^^^^^^^^^^^ > > File "/usr/lib/python3.11/http/client.py", line 287, in _read_status > > raise RemoteDisconnected("Remote end closed connection without" > > http.client.RemoteDisconnected: Remote end closed connection without response > > > > During handling of the above exception, another exception occurred: > > > > Traceback (most recent call last): > > File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 228, in do_open > > h.request(req.get_method(), req.selector, req.data, headers) > > File "/usr/lib/python3.11/http/client.py", line 1282, in request > > self._send_request(method, url, body, headers, encode_chunked) > > File "/usr/lib/python3.11/http/client.py", line 1328, in _send_request > > self.endheaders(body, encode_chunked=encode_chunked) > > File "/usr/lib/python3.11/http/client.py", line 1277, in endheaders > > self._send_output(message_body, encode_chunked=encode_chunked) > > File "/usr/lib/python3.11/http/client.py", line 1037, in _send_output > > self.send(msg) > > File "/usr/lib/python3.11/http/client.py", line 998, in send > > self.sock.sendall(data) > > OSError: [Errno 9] Bad file descriptor > > > > During handling of the above exception, another exception occurred: > > > > Traceback (most recent call last): > > File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 415, in checkstatus > > with opener.open(r, timeout=100) as response: > > ^^^^^^^^^^^^^^^^^^^^^^^^^^^ > > File "/usr/lib/python3.11/urllib/request.py", line 519, in open > > response = self._open(req, data) > > ^^^^^^^^^^^^^^^^^^^^^ > > File "/usr/lib/python3.11/urllib/request.py", line 536, in _open > > result = self._call_chain(self.handle_open, protocol, protocol + > > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > > File "/usr/lib/python3.11/urllib/request.py", line 496, in _call_chain > > result = func(*args) > > ^^^^^^^^^^^ > > File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 178, in http_open > > return self.do_open(HTTPConnectionCache, req) > > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > > File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 246, in do_open > > raise urllib.error.URLError(err) > > > > Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org> > > --- > > Could that be the fix for bug #15945? > AB-INT: fetch/sstate/CDN - Missing objects in the cache: Bad file descriptor > https://bugzilla.yoctoproject.org/show_bug.cgi?id=15945 I'm hoping so! I forgot to put the bug number in here but this is aiming to fix that. Cheers, Richard
diff --git a/lib/bb/fetch2/wget.py b/lib/bb/fetch2/wget.py index 4e3505599b4..a9d432b25d3 100644 --- a/lib/bb/fetch2/wget.py +++ b/lib/bb/fetch2/wget.py @@ -216,31 +216,20 @@ class Wget(FetchMethod): try: h.request(req.get_method(), req.selector, req.data, headers) - except socket.error as err: # XXX what error? - # Don't close connection when cache is enabled. - # Instead, try to detect connections that are no longer - # usable (for example, closed unexpectedly) and remove - # them from the cache. - if fetch.connection_cache is None: - h.close() - elif isinstance(err, OSError) and err.errno == errno.EBADF: - # This happens when the server closes the connection despite the Keep-Alive. - # Apparently urllib then uses the file descriptor, expecting it to be - # connected, when in reality the connection is already gone. - # We let the request fail and expect it to be - # tried once more ("try_again" in check_status()), - # with the dead connection removed from the cache. - # If it still fails, we give up, which can happen for bad - # HTTP proxy settings. + r = h.getresponse() + except: + # This can happen when the server closes the connection despite the Keep-Alive. + # Apparently urllib then uses the file descriptor, expecting it to be + # connected, when in reality the connection is already gone. + # We let the request fail and expect it to be + # tried once more ("try_again" in check_status()), + # with the dead connection removed from the cache. + # If it still fails, we give up, which can happen for bad + # HTTP proxy settings. + if fetch.connection_cache: fetch.connection_cache.remove_connection(h.host, h.port) - raise urllib.error.URLError(err) - else: - try: - r = h.getresponse() - except TimeoutError as e: - if fetch.connection_cache: - fetch.connection_cache.remove_connection(h.host, h.port) - raise TimeoutError(e) + h.close() + raise # Pick apart the HTTPResponse object to get the addinfourl # object initialized properly. @@ -404,9 +393,9 @@ class Wget(FetchMethod): with opener.open(r, timeout=100) as response: pass - except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e: + except Exception as e: if try_again: - logger.debug2("checkstatus: trying again") + logger.debug2("checkstatus: trying again after exception %s" % str(e)) return self.checkstatus(fetch, ud, d, False) else: # debug for now to avoid spamming the logs in e.g. remote sstate searches
We see occasional connection errors in wget testing of sstate mirrors. It appears there is a case where http.client.RemoteDisconnected is returned against getrepsonse() which the current code doesn't handle well. Rather than trying to handle very specific error cases, catch any errors and drop the cached connection in those cases in the do_open() code. Similarly, try again, once in the case of errors in all cases rather than trying to handle a specific exception list. The traceback from the logs is included below for reference. Traceback (most recent call last): File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 415, in checkstatus with opener.open(r, timeout=100) as response: ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.11/urllib/request.py", line 519, in open response = self._open(req, data) ^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.11/urllib/request.py", line 536, in _open result = self._call_chain(self.handle_open, protocol, protocol + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.11/urllib/request.py", line 496, in _call_chain result = func(*args) ^^^^^^^^^^^ File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 178, in http_open return self.do_open(HTTPConnectionCache, req) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 249, in do_open r = h.getresponse() ^^^^^^^^^^^^^^^ File "/usr/lib/python3.11/http/client.py", line 1374, in getresponse response.begin() File "/usr/lib/python3.11/http/client.py", line 318, in begin version, status, reason = self._read_status() ^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.11/http/client.py", line 287, in _read_status raise RemoteDisconnected("Remote end closed connection without" http.client.RemoteDisconnected: Remote end closed connection without response During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 228, in do_open h.request(req.get_method(), req.selector, req.data, headers) File "/usr/lib/python3.11/http/client.py", line 1282, in request self._send_request(method, url, body, headers, encode_chunked) File "/usr/lib/python3.11/http/client.py", line 1328, in _send_request self.endheaders(body, encode_chunked=encode_chunked) File "/usr/lib/python3.11/http/client.py", line 1277, in endheaders self._send_output(message_body, encode_chunked=encode_chunked) File "/usr/lib/python3.11/http/client.py", line 1037, in _send_output self.send(msg) File "/usr/lib/python3.11/http/client.py", line 998, in send self.sock.sendall(data) OSError: [Errno 9] Bad file descriptor During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 415, in checkstatus with opener.open(r, timeout=100) as response: ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.11/urllib/request.py", line 519, in open response = self._open(req, data) ^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.11/urllib/request.py", line 536, in _open result = self._call_chain(self.handle_open, protocol, protocol + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/lib/python3.11/urllib/request.py", line 496, in _call_chain result = func(*args) ^^^^^^^^^^^ File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 178, in http_open return self.do_open(HTTPConnectionCache, req) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/srv/pokybuild/yocto-worker/a-full/build/layers/bitbake/lib/bb/fetch2/wget.py", line 246, in do_open raise urllib.error.URLError(err) Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org> --- lib/bb/fetch2/wget.py | 41 +++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 26 deletions(-)