new file mode 100644
@@ -0,0 +1,344 @@
+From d33bc21414e283c9e6fe7f6caf69e2ed60d66c82 Mon Sep 17 00:00:00 2001
+From: Sam Bull <git@sambull.org>
+Date: Sun, 28 Jan 2024 17:09:58 +0000
+Subject: [PATCH] Improve validation in HTTP parser (#8074) (#8078)
+
+Co-authored-by: Paul J. Dorn <pajod@users.noreply.github.com>
+Co-authored-by: Sviatoslav Sydorenko (Святослав Сидоренко)
+<sviat@redhat.com>
+(cherry picked from commit 33ccdfb0a12690af5bb49bda2319ec0907fa7827)
+
+CVE: CVE-2024-23829
+
+Upstream-Status: Backport [https://github.com/aio-libs/aiohttp/commit/d33bc21414e283c9e6fe7f6caf69e2ed60d66c82]
+
+Signed-off-by: Soumya Sambu <soumya.sambu@windriver.com>
+---
+ CONTRIBUTORS.txt | 1 +
+ aiohttp/http_parser.py | 30 ++++----
+ tests/test_http_parser.py | 139 +++++++++++++++++++++++++++++++++++++-
+ 3 files changed, 155 insertions(+), 15 deletions(-)
+
+diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt
+index f8a8df5..b9cdf75 100644
+--- a/CONTRIBUTORS.txt
++++ b/CONTRIBUTORS.txt
+@@ -240,6 +240,7 @@ Panagiotis Kolokotronis
+ Pankaj Pandey
+ Pau Freixes
+ Paul Colomiets
++Paul J. Dorn
+ Paulius Šileikis
+ Paulus Schoutsen
+ Pavel Kamaev
+diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py
+index 175eb7f..91784b3 100644
+--- a/aiohttp/http_parser.py
++++ b/aiohttp/http_parser.py
+@@ -76,10 +76,11 @@ ASCIISET: Final[Set[str]] = set(string.printable)
+ # tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
+ # "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
+ # token = 1*tchar
+-METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
+-VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)")
+-HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\]")
+-HEXDIGIT = re.compile(rb"[0-9a-fA-F]+")
++_TCHAR_SPECIALS: Final[str] = re.escape("!#$%&'*+-.^_`|~")
++TOKENRE: Final[Pattern[str]] = re.compile(f"[0-9A-Za-z{_TCHAR_SPECIALS}]+")
++VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d)\.(\d)", re.ASCII)
++DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII)
++HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+")
+
+
+ class RawRequestMessage(NamedTuple):
+@@ -145,6 +146,7 @@ class HeadersParser:
+ self, lines: List[bytes]
+ ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]:
+ headers: CIMultiDict[str] = CIMultiDict()
++ # note: "raw" does not mean inclusion of OWS before/after the field value
+ raw_headers = []
+
+ lines_idx = 0
+@@ -158,13 +160,14 @@ class HeadersParser:
+ except ValueError:
+ raise InvalidHeader(line) from None
+
++ if len(bname) == 0:
++ raise InvalidHeader(bname)
++
+ # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
+ if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"}
+ raise InvalidHeader(line)
+
+ bvalue = bvalue.lstrip(b" \t")
+- if HDRRE.search(bname):
+- raise InvalidHeader(bname)
+ if len(bname) > self.max_field_size:
+ raise LineTooLong(
+ "request header name {}".format(
+@@ -173,6 +176,9 @@ class HeadersParser:
+ str(self.max_field_size),
+ str(len(bname)),
+ )
++ name = bname.decode("utf-8", "surrogateescape")
++ if not TOKENRE.fullmatch(name):
++ raise InvalidHeader(bname)
+
+ header_length = len(bvalue)
+
+@@ -220,7 +226,6 @@ class HeadersParser:
+ )
+
+ bvalue = bvalue.strip(b" \t")
+- name = bname.decode("utf-8", "surrogateescape")
+ value = bvalue.decode("utf-8", "surrogateescape")
+
+ # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
+@@ -348,7 +353,8 @@ class HttpParser(abc.ABC, Generic[_MsgT]):
+
+ # Shouldn't allow +/- or other number formats.
+ # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
+- if not length_hdr.strip(" \t").isdecimal():
++ # msg.headers is already stripped of leading/trailing wsp
++ if not DIGITS.fullmatch(length_hdr):
+ raise InvalidHeader(CONTENT_LENGTH)
+
+ return int(length_hdr)
+@@ -582,7 +588,7 @@ class HttpRequestParser(HttpParser[RawRequestMessage]):
+ )
+
+ # method
+- if not METHRE.match(method):
++ if not TOKENRE.fullmatch(method):
+ raise BadStatusLine(method)
+
+ # version
+@@ -690,8 +696,8 @@ class HttpResponseParser(HttpParser[RawResponseMessage]):
+ raise BadStatusLine(line)
+ version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
+
+- # The status code is a three-digit number
+- if len(status) != 3 or not status.isdecimal():
++ # The status code is a three-digit ASCII number, no padding
++ if len(status) != 3 or not DIGITS.fullmatch(status):
+ raise BadStatusLine(line)
+ status_i = int(status)
+
+@@ -844,7 +850,7 @@ class HttpPayloadParser:
+ if self._lax: # Allow whitespace in lax mode.
+ size_b = size_b.strip()
+
+- if not re.fullmatch(HEXDIGIT, size_b):
++ if not re.fullmatch(HEXDIGITS, size_b):
+ exc = TransferEncodingError(
+ chunk[:pos].decode("ascii", "surrogateescape")
+ )
+diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py
+index 4b185c9..bcf6058 100644
+--- a/tests/test_http_parser.py
++++ b/tests/test_http_parser.py
+@@ -2,7 +2,8 @@
+
+ import asyncio
+ import re
+-from typing import Any, List
++from contextlib import nullcontext
++from typing import Any, Dict, List
+ from unittest import mock
+ from urllib.parse import quote
+
+@@ -168,12 +169,28 @@ def test_cve_2023_37276(parser) -> None:
+ parser.feed_data(text)
+
+
++@pytest.mark.parametrize(
++ "rfc9110_5_6_2_token_delim",
++ r'"(),/:;<=>?@[\]{}',
++)
++def test_bad_header_name(parser: Any, rfc9110_5_6_2_token_delim: str) -> None:
++ text = f"POST / HTTP/1.1\r\nhead{rfc9110_5_6_2_token_delim}er: val\r\n\r\n".encode()
++ expectation = pytest.raises(http_exceptions.BadHttpMessage)
++ if rfc9110_5_6_2_token_delim == ":":
++ # Inserting colon into header just splits name/value earlier.
++ expectation = nullcontext()
++ with expectation:
++ parser.feed_data(text)
++
++
+ @pytest.mark.parametrize(
+ "hdr",
+ (
+ # https://www.rfc-editor.org/rfc/rfc9110.html#name-content-length
+ "Content-Length: -5",
+ "Content-Length: +256",
++ "Content-Length: \N{superscript one}",
++ "Content-Length: \N{mathematical double-struck digit one}",
+ "Foo: abc\rdef", # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
+ "Bar: abc\ndef",
+ "Baz: abc\x00def",
+@@ -240,6 +257,20 @@ def test_whitespace_before_header(parser) -> None:
+ parser.feed_data(text)
+
+
++def test_parse_unusual_request_line(parser) -> None:
++ if not isinstance(response, HttpResponseParserPy):
++ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
++ text = b"#smol //a HTTP/1.3\r\n\r\n"
++ messages, upgrade, tail = parser.feed_data(text)
++ assert len(messages) == 1
++ msg, _ = messages[0]
++ assert msg.compression is None
++ assert not msg.upgrade
++ assert msg.method == "#smol"
++ assert msg.path == "//a"
++ assert msg.version == (1, 3)
++
++
+ def test_parse(parser) -> None:
+ text = b"GET /test HTTP/1.1\r\n\r\n"
+ messages, upgrade, tail = parser.feed_data(text)
+@@ -533,6 +564,43 @@ def test_headers_content_length_err_2(parser) -> None:
+ parser.feed_data(text)
+
+
++_pad: Dict[bytes, str] = {
++ b"": "empty",
++ # not a typo. Python likes triple zero
++ b"\000": "NUL",
++ b" ": "SP",
++ b" ": "SPSP",
++ # not a typo: both 0xa0 and 0x0a in case of 8-bit fun
++ b"\n": "LF",
++ b"\xa0": "NBSP",
++ b"\t ": "TABSP",
++}
++
++
++@pytest.mark.parametrize("hdr", [b"", b"foo"], ids=["name-empty", "with-name"])
++@pytest.mark.parametrize("pad2", _pad.keys(), ids=["post-" + n for n in _pad.values()])
++@pytest.mark.parametrize("pad1", _pad.keys(), ids=["pre-" + n for n in _pad.values()])
++def test_invalid_header_spacing(parser, pad1: bytes, pad2: bytes, hdr: bytes) -> None:
++ text = b"GET /test HTTP/1.1\r\n" b"%s%s%s: value\r\n\r\n" % (pad1, hdr, pad2)
++ expectation = pytest.raises(http_exceptions.BadHttpMessage)
++ if pad1 == pad2 == b"" and hdr != b"":
++ # one entry in param matrix is correct: non-empty name, not padded
++ expectation = nullcontext()
++ if pad1 == pad2 == hdr == b"":
++ if not isinstance(response, HttpResponseParserPy):
++ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
++ with expectation:
++ parser.feed_data(text)
++
++
++def test_empty_header_name(parser) -> None:
++ if not isinstance(response, HttpResponseParserPy):
++ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
++ text = b"GET /test HTTP/1.1\r\n" b":test\r\n\r\n"
++ with pytest.raises(http_exceptions.BadHttpMessage):
++ parser.feed_data(text)
++
++
+ def test_invalid_header(parser) -> None:
+ text = b"GET /test HTTP/1.1\r\n" b"test line\r\n\r\n"
+ with pytest.raises(http_exceptions.BadHttpMessage):
+@@ -655,6 +723,34 @@ def test_http_request_bad_status_line(parser) -> None:
+ assert r"\n" not in exc_info.value.message
+
+
++_num: Dict[bytes, str] = {
++ # dangerous: accepted by Python int()
++ # unicodedata.category("\U0001D7D9") == 'Nd'
++ "\N{mathematical double-struck digit one}".encode(): "utf8digit",
++ # only added for interop tests, refused by Python int()
++ # unicodedata.category("\U000000B9") == 'No'
++ "\N{superscript one}".encode(): "utf8number",
++ "\N{superscript one}".encode("latin-1"): "latin1number",
++}
++
++
++@pytest.mark.parametrize("nonascii_digit", _num.keys(), ids=_num.values())
++def test_http_request_bad_status_line_number(
++ parser: Any, nonascii_digit: bytes
++) -> None:
++ text = b"GET /digit HTTP/1." + nonascii_digit + b"\r\n\r\n"
++ with pytest.raises(http_exceptions.BadStatusLine):
++ parser.feed_data(text)
++
++
++def test_http_request_bad_status_line_separator(parser: Any) -> None:
++ # single code point, old, multibyte NFKC, multibyte NFKD
++ utf8sep = "\N{arabic ligature sallallahou alayhe wasallam}".encode()
++ text = b"GET /ligature HTTP/1" + utf8sep + b"1\r\n\r\n"
++ with pytest.raises(http_exceptions.BadStatusLine):
++ parser.feed_data(text)
++
++
+ def test_http_request_upgrade(parser) -> None:
+ text = (
+ b"GET /test HTTP/1.1\r\n"
+@@ -670,6 +766,31 @@ def test_http_request_upgrade(parser) -> None:
+ assert tail == b"some raw data"
+
+
++def test_http_request_parser_utf8_request_line(parser) -> None:
++ if not isinstance(response, HttpResponseParserPy):
++ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
++ messages, upgrade, tail = parser.feed_data(
++ # note the truncated unicode sequence
++ b"GET /P\xc3\xbcnktchen\xa0\xef\xb7 HTTP/1.1\r\n" +
++ # for easier grep: ASCII 0xA0 more commonly known as non-breaking space
++ # note the leading and trailing spaces
++ "sTeP: \N{latin small letter sharp s}nek\t\N{no-break space} "
++ "\r\n\r\n".encode()
++ )
++ msg = messages[0][0]
++
++ assert msg.method == "GET"
++ assert msg.path == "/Pünktchen\udca0\udcef\udcb7"
++ assert msg.version == (1, 1)
++ assert msg.headers == CIMultiDict([("STEP", "ßnek\t\xa0")])
++ assert msg.raw_headers == ((b"sTeP", "ßnek\t\xa0".encode()),)
++ assert not msg.should_close
++ assert msg.compression is None
++ assert not msg.upgrade
++ assert not msg.chunked
++ assert msg.url.path == URL("/P%C3%BCnktchen\udca0\udcef\udcb7").path
++
++
+ def test_http_request_parser_utf8(parser) -> None:
+ text = "GET /path HTTP/1.1\r\nx-test:тест\r\n\r\n".encode()
+ messages, upgrade, tail = parser.feed_data(text)
+@@ -719,9 +840,15 @@ def test_http_request_parser_two_slashes(parser) -> None:
+ assert not msg.chunked
+
+
+-def test_http_request_parser_bad_method(parser) -> None:
++@pytest.mark.parametrize(
++ "rfc9110_5_6_2_token_delim",
++ [bytes([i]) for i in rb'"(),/:;<=>?@[\]{}'],
++)
++def test_http_request_parser_bad_method(
++ parser, rfc9110_5_6_2_token_delim: bytes
++) -> None:
+ with pytest.raises(http_exceptions.BadStatusLine):
+- parser.feed_data(b'=":<G>(e),[T];?" /get HTTP/1.1\r\n\r\n')
++ parser.feed_data(rfc9110_5_6_2_token_delim + b'ET" /get HTTP/1.1\r\n\r\n')
+
+
+ def test_http_request_parser_bad_version(parser) -> None:
+@@ -907,6 +1034,12 @@ def test_http_response_parser_code_not_int(response) -> None:
+ response.feed_data(b"HTTP/1.1 ttt test\r\n\r\n")
+
+
++@pytest.mark.parametrize("nonascii_digit", _num.keys(), ids=_num.values())
++def test_http_response_parser_code_not_ascii(response, nonascii_digit: bytes) -> None:
++ with pytest.raises(http_exceptions.BadStatusLine):
++ response.feed_data(b"HTTP/1.1 20" + nonascii_digit + b" test\r\n\r\n")
++
++
+ def test_http_request_chunked_payload(parser) -> None:
+ text = b"GET /test HTTP/1.1\r\n" b"transfer-encoding: chunked\r\n\r\n"
+ msg, payload = parser.feed_data(text)[0][0]
+--
+2.40.0
+
@@ -11,6 +11,7 @@ SRC_URI += "file://CVE-2024-23334.patch \
file://CVE-2023-49082.patch \
file://CVE-2024-27306.patch \
file://CVE-2025-53643.patch \
+ file://CVE-2024-23829.patch \
"
SRC_URI[sha256sum] = "b0cf2a4501bff9330a8a5248b4ce951851e415bdcce9dc158e76cfd55e15085c"