@@ -11,6 +11,7 @@
#
import email
+import patchtest_patterns
import re
# From: https://stackoverflow.com/questions/59681461/read-a-big-mbox-file-with-python
@@ -32,7 +33,7 @@ class MboxReader:
lines = []
while True:
line = self.handle.readline()
- if line == b'' or line.startswith(b'From '):
+ if line == b'' or patchtest_patterns.mbox_from.match(line):
yield email.message_from_bytes(b''.join(lines))
if line == b'':
break
@@ -61,6 +61,10 @@ mbox_cover_letter_regex = pyparsing.Regex(r'\[\S+\s+0+/\d+\]')
mbox_shortlog_maxlength = 90
# based on https://stackoverflow.com/questions/30281026/regex-parsing-github-usernames-javascript
mbox_github_username = pyparsing.Regex(r'\B(?<!\${)@([a-z0-9](?:-(?=[a-z0-9])|[a-z0-9]){0,38}(?<=[a-z0-9]))')
+# Standard mbox From_ separator line: "From <addr> Www Mmm [D]D HH:MM:SS YYYY"
+mbox_from = re.compile(
+ rb'^From \S+ \w{3} \w{3} [ \d]\d \d{2}:\d{2}:\d{2} \d{4}'
+)
# patch
It's possible that a patch's commit message includes a line that begins with 'From' to indicate a range, where a change's inspiration came from, and so on. The current patchtest logic automatically splits on anything that looks this way, which results in an error thrown due to missing context in the diff list. Add a new regex pattern matching the typical 'From' line format in patch files so that patchtest splits on the correct text. AI-Generated: Uses Claude Code Signed-off-by: Trevor Gamblin <tgamblin@baylibre.com> --- meta/lib/patchtest/mbox.py | 3 ++- meta/lib/patchtest/patchtest_patterns.py | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-)