From 239e938e5c7d898b19255fa1907f4e18cf4a0237 Mon Sep 17 00:00:00 2001 From: wency Date: Wed, 3 Dec 2025 16:31:44 +0800 Subject: [PATCH 1/2] test --- fix-CVE-2025-6069.patch | 3983 +++++++++++++++++++++++++++++++++++++++ python3.spec | 8 +- 2 files changed, 3989 insertions(+), 2 deletions(-) create mode 100644 fix-CVE-2025-6069.patch diff --git a/fix-CVE-2025-6069.patch b/fix-CVE-2025-6069.patch new file mode 100644 index 0000000..6583ab6 --- /dev/null +++ b/fix-CVE-2025-6069.patch @@ -0,0 +1,3983 @@ + + + + + +fix-CVE-2025-6069.patch · src-anolis-os/python3 - Gitee.com + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+ + + + +
+ +
+
+
+ + +
+ + + +
+
+
+ + +
+
+ +
+
+
+ + + + + + + + + +8 + + + +Star +0 + + + +Fork +41 + +
+

+ src-anolis-os/python3 + + +

+
+
+
+ + + + + + + + +
+
+
+
+
+加入 Gitee +
+
+与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :) +
+免费加入 + +
+
+ + +
+ +
+
+ +
+
+
+
+ +
+文件 +
+
+
+
+ + + + + +
+ + + +
+ +
+
+ + + + +
+
+
+ + + + + + + + +
+
+
+ + + + + +
+
+
+
+
+
+
+该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。 + + +
+ +
+
+ +
+ +
+
+ +
+
+克隆/下载 + +
+ + + + +
+
+
+
+ + + + + +
+ + + +
+ +
+
+ + + + +
+ + +
+ + + + + + +
+
+
+
+ +
+
+ + +fix-CVE-2025-6069.patch + +10.19 KB +
+
+ +一键复制 +编辑 +原始数据 +按行查看 +历史 +
+ + + +
+
+
+
+
+wency +提交于 + +2025-08-13 13:46 +08:00 + +. +Add patch to fix CVE-2025-6069 + +
+
+
+
+
+
From 089c6aa56d4e826ef67a492be4b832764273a937 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Fri, 13 Jun 2025 19:57:48 +0300
Subject: [PATCH] [3.11] gh-135462: Fix quadratic complexity in processing
special input in HTMLParser (GH-135464)
End-of-file errors are now handled according to the HTML5 specs --
comments and declarations are automatically closed, tags are ignored.
(cherry picked from commit 6eb6c5dbfb528bd07d77b60fd71fd05d81d45c41)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
---
Lib/html/parser.py | 41 +++++---
Lib/test/test_htmlparser.py | 95 ++++++++++++++++---
...-06-13-15-55-22.gh-issue-135462.KBeJpc.rst | 4 +
3 files changed, 117 insertions(+), 23 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index bef0f4fe4bf776..9c38008bbfd06b 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -25,6 +25,7 @@
charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
starttagopen = re.compile('<[a-zA-Z]')
+endtagopen = re.compile('</[a-zA-Z]')
piclose = re.compile('>')
commentclose = re.compile(r'--\s*>')
# Note:
@@ -176,7 +177,7 @@ def goahead(self, end):
k = self.parse_pi(i)
elif startswith("<!", i):
k = self.parse_html_declaration(i)
- elif (i + 1) < n:
+ elif (i + 1) < n or end:
self.handle_data("<")
k = i + 1
else:
@@ -184,17 +185,35 @@ def goahead(self, end):
if k < 0:
if not end:
break
- k = rawdata.find('>', i + 1)
- if k < 0:
- k = rawdata.find('<', i + 1)
- if k < 0:
- k = i + 1
- else:
- k += 1
- if self.convert_charrefs and not self.cdata_elem:
- self.handle_data(unescape(rawdata[i:k]))
+ if starttagopen.match(rawdata, i): # < + letter
+ pass
+ elif startswith("</", i):
+ if i + 2 == n:
+ self.handle_data("</")
+ elif endtagopen.match(rawdata, i): # </ + letter
+ pass
+ else:
+ # bogus comment
+ self.handle_comment(rawdata[i+2:])
+ elif startswith("<!--", i):
+ j = n
+ for suffix in ("--!", "--", "-"):
+ if rawdata.endswith(suffix, i+4):
+ j -= len(suffix)
+ break
+ self.handle_comment(rawdata[i+4:j])
+ elif startswith("<![CDATA[", i):
+ self.unknown_decl(rawdata[i+3:])
+ elif rawdata[i:i+9].lower() == '<!doctype':
+ self.handle_decl(rawdata[i+2:])
+ elif startswith("<!", i):
+ # bogus comment
+ self.handle_comment(rawdata[i+2:])
+ elif startswith("<?", i):
+ self.handle_pi(rawdata[i+2:])
else:
- self.handle_data(rawdata[i:k])
+ raise AssertionError("we should not get here!")
+ k = n
i = self.updatepos(i, k)
elif startswith("&#", i):
match = charref.match(rawdata, i)
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index 12917755a56017..df775c11310146 100644
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -4,6 +4,8 @@
import pprint
import unittest
+from test import support
+
class EventCollector(html.parser.HTMLParser):
@@ -391,28 +393,34 @@ def test_tolerant_parsing(self):
('data', '<'),
('starttag', 'bc<', [('a', None)]),
('endtag', 'html'),
- ('data', '\n<img src="URL>'),
- ('comment', '/img'),
- ('endtag', 'html<')])
+ ('data', '\n')])
def test_starttag_junk_chars(self):
+ self._run_check("<", [('data', '<')])
+ self._run_check("<>", [('data', '<>')])
+ self._run_check("< >", [('data', '< >')])
+ self._run_check("< ", [('data', '< ')])
self._run_check("</>", [])
+ self._run_check("<$>", [('data', '<$>')])
self._run_check("</$>", [('comment', '$')])
self._run_check("</", [('data', '</')])
- self._run_check("</a", [('data', '</a')])
+ self._run_check("</a", [])
+ self._run_check("</ a>", [('endtag', 'a')])
+ self._run_check("</ a", [('comment', ' a')])
self._run_check("<a<a>", [('starttag', 'a<a', [])])
self._run_check("</a<a>", [('endtag', 'a<a')])
- self._run_check("<!", [('data', '<!')])
- self._run_check("<a", [('data', '<a')])
- self._run_check("<a foo='bar'", [('data', "<a foo='bar'")])
- self._run_check("<a foo='bar", [('data', "<a foo='bar")])
- self._run_check("<a foo='>'", [('data', "<a foo='>'")])
- self._run_check("<a foo='>", [('data', "<a foo='>")])
+ self._run_check("<!", [('comment', '')])
+ self._run_check("<a", [])
+ self._run_check("<a foo='bar'", [])
+ self._run_check("<a foo='bar", [])
+ self._run_check("<a foo='>'", [])
+ self._run_check("<a foo='>", [])
self._run_check("<a$>", [('starttag', 'a$', [])])
self._run_check("<a$b>", [('starttag', 'a$b', [])])
self._run_check("<a$b/>", [('startendtag', 'a$b', [])])
self._run_check("<a$b >", [('starttag', 'a$b', [])])
self._run_check("<a$b />", [('startendtag', 'a$b', [])])
+ self._run_check("</a$b>", [('endtag', 'a$b')])
def test_slashes_in_starttag(self):
self._run_check('<a foo="var"/>', [('startendtag', 'a', [('foo', 'var')])])
@@ -537,13 +545,56 @@ def test_EOF_in_charref(self):
for html, expected in data:
self._run_check(html, expected)
- def test_broken_comments(self):
- html = ('<! not really a comment >'
+ def test_eof_in_comments(self):
+ data = [
+ ('<!--', [('comment', '')]),
+ ('<!---', [('comment', '')]),
+ ('<!----', [('comment', '')]),
+ ('<!-----', [('comment', '-')]),
+ ('<!------', [('comment', '--')]),
+ ('<!----!', [('comment', '')]),
+ ('<!---!', [('comment', '-!')]),
+ ('<!---!>', [('comment', '-!>')]),
+ ('<!--foo', [('comment', 'foo')]),
+ ('<!--foo-', [('comment', 'foo')]),
+ ('<!--foo--', [('comment', 'foo')]),
+ ('<!--foo--!', [('comment', 'foo')]),
+ ('<!--<!--', [('comment', '<!')]),
+ ('<!--<!--!', [('comment', '<!')]),
+ ]
+ for html, expected in data:
+ self._run_check(html, expected)
+
+ def test_eof_in_declarations(self):
+ data = [
+ ('<!', [('comment', '')]),
+ ('<!-', [('comment', '-')]),
+ ('<![', [('comment', '[')]),
+ ('<![CDATA[', [('unknown decl', 'CDATA[')]),
+ ('<![CDATA[x', [('unknown decl', 'CDATA[x')]),
+ ('<![CDATA[x]', [('unknown decl', 'CDATA[x]')]),
+ ('<![CDATA[x]]', [('unknown decl', 'CDATA[x]]')]),
+ ('<!DOCTYPE', [('decl', 'DOCTYPE')]),
+ ('<!DOCTYPE ', [('decl', 'DOCTYPE ')]),
+ ('<!DOCTYPE html', [('decl', 'DOCTYPE html')]),
+ ('<!DOCTYPE html ', [('decl', 'DOCTYPE html ')]),
+ ('<!DOCTYPE html PUBLIC', [('decl', 'DOCTYPE html PUBLIC')]),
+ ('<!DOCTYPE html PUBLIC "foo', [('decl', 'DOCTYPE html PUBLIC "foo')]),
+ ('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "foo',
+ [('decl', 'DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "foo')]),
+ ]
+ for html, expected in data:
+ self._run_check(html, expected)
+
+ def test_bogus_comments(self):
+ html = ('<!ELEMENT br EMPTY>'
+ '<! not really a comment >'
'<! not a comment either -->'
'<! -- close enough -->'
'<!><!<-- this was an empty comment>'
'<!!! another bogus comment !!!>')
expected = [
+ ('comment', 'ELEMENT br EMPTY'),
('comment', ' not really a comment '),
('comment', ' not a comment either --'),
('comment', ' -- close enough --'),
@@ -598,6 +649,26 @@ def test_convert_charrefs_dropped_text(self):
('endtag', 'a'), ('data', ' bar & baz')]
)
+ @support.requires_resource('cpu')
+ def test_eof_no_quadratic_complexity(self):
+ # Each of these examples used to take about an hour.
+ # Now they take a fraction of a second.
+ def check(source):
+ parser = html.parser.HTMLParser()
+ parser.feed(source)
+ parser.close()
+ n = 120_000
+ check("<a " * n)
+ check("<a a=" * n)
+ check("</a " * 14 * n)
+ check("</a a=" * 11 * n)
+ check("<!--" * 4 * n)
+ check("<!" * 60 * n)
+ check("<?" * 19 * n)
+ check("</$" * 15 * n)
+ check("<![CDATA[" * 9 * n)
+ check("<!doctype" * 35 * n)
+
class AttributesTestCase(TestCaseBase):
diff --git a/Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst b/Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst
new file mode 100644
index 00000000000000..cf9aa8dbdf2efe
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst
@@ -0,0 +1,4 @@
+Fix quadratic complexity in processing specially crafted input in
+:class:`html.parser.HTMLParser`. End-of-file errors are now handled according
+to the HTML5 specs -- comments and declarations are automatically closed,
+tags are ignored.
+
+ + +
+
+
+
Loading...
+
+
+
+ + + +
+ + + + + + + +
+ + +
+
+
+ +
+
+
+ +马建仓 AI 助手 +
+
+ +
+
+
+
+
+
+
+
+
+
尝试更多
+
+
代码解读
+
+
+
代码找茬
+
+
+
代码优化
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ + + + +
+
+
+ + + + + + +
+ + + + +
+
+
1
+
https://gitee.com/src-anolis-os/python3.git
+
git@gitee.com:src-anolis-os/python3.git
+
src-anolis-os
+
python3
+
python3
+
cd762fafb1e37a6a832e8f4666436d3252f84371
+
+ + + + + + + + + + + + + +
+
+ +
+ + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + diff --git a/python3.spec b/python3.spec index a4b74d6..a0a92f2 100644 --- a/python3.spec +++ b/python3.spec @@ -1,4 +1,4 @@ -%define anolis_release 7 +%define anolis_release 8 %global pybasever 3.11 # pybasever without the dot: @@ -249,7 +249,8 @@ Patch1007: fix-CVE-2024-0397.patch Patch1008: fix-CVE-2025-4516.patch # https://github.com/python/cpython/commit/7040aa54f14676938970e10c5f74ea93cd56aa38 Patch1009: fix-CVE-2025-8194.patch - +# https://github.com/python/cpython/pull/135484 +Patch1010: fix-CVE-2025-6069.patch # ========================================== # Descriptions, and metadata for subpackages # ========================================== @@ -1521,6 +1522,9 @@ CheckPython optimized # ====================================================== %changelog +* Wed Aug 13 2025 qiyue - 3.11.6-8 +- Add patch to fix CVE-2025-6069 + * Wed Aug 06 2025 wenxin - 3.11.6-7 - Add patch to fix CVE-2025-8194 -- Gitee From 14117f3dcc80fbbe6ddd8931b3ce8cef9939a6d1 Mon Sep 17 00:00:00 2001 From: wency Date: Wed, 3 Dec 2025 16:33:53 +0800 Subject: [PATCH 2/2] up' --- fix-CVE-2025-6069.patch | 4221 +++------------------------------------ 1 file changed, 239 insertions(+), 3982 deletions(-) diff --git a/fix-CVE-2025-6069.patch b/fix-CVE-2025-6069.patch index 6583ab6..f35ee06 100644 --- a/fix-CVE-2025-6069.patch +++ b/fix-CVE-2025-6069.patch @@ -1,3983 +1,240 @@ - - - - +From 089c6aa56d4e826ef67a492be4b832764273a937 Mon Sep 17 00:00:00 2001 +From: Serhiy Storchaka +Date: Fri, 13 Jun 2025 19:57:48 +0300 +Subject: [PATCH] [3.11] gh-135462: Fix quadratic complexity in processing + special input in HTMLParser (GH-135464) + +End-of-file errors are now handled according to the HTML5 specs -- +comments and declarations are automatically closed, tags are ignored. +(cherry picked from commit 6eb6c5dbfb528bd07d77b60fd71fd05d81d45c41) + +Co-authored-by: Serhiy Storchaka +--- + Lib/html/parser.py | 41 +++++--- + Lib/test/test_htmlparser.py | 95 ++++++++++++++++--- + ...-06-13-15-55-22.gh-issue-135462.KBeJpc.rst | 4 + + 3 files changed, 117 insertions(+), 23 deletions(-) + create mode 100644 Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst + +diff --git a/Lib/html/parser.py b/Lib/html/parser.py +index bef0f4fe4bf776..9c38008bbfd06b 100644 +--- a/Lib/html/parser.py ++++ b/Lib/html/parser.py +@@ -25,6 +25,7 @@ + charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') + + starttagopen = re.compile('<[a-zA-Z]') ++endtagopen = re.compile('') + commentclose = re.compile(r'--\s*>') + # Note: +@@ -176,7 +177,7 @@ def goahead(self, end): + k = self.parse_pi(i) + elif startswith("', i + 1) +- if k < 0: +- k = rawdata.find('<', i + 1) +- if k < 0: +- k = i + 1 +- else: +- k += 1 +- if self.convert_charrefs and not self.cdata_elem: +- self.handle_data(unescape(rawdata[i:k])) ++ if starttagopen.match(rawdata, i): # < + letter ++ pass ++ elif startswith("'), +- ('comment', '/img'), +- ('endtag', 'html<')]) ++ ('data', '\n')]) + + def test_starttag_junk_chars(self): ++ self._run_check("<", [('data', '<')]) ++ self._run_check("<>", [('data', '<>')]) ++ self._run_check("< >", [('data', '< >')]) ++ self._run_check("< ", [('data', '< ')]) + self._run_check("", []) ++ self._run_check("<$>", [('data', '<$>')]) + self._run_check("", [('comment', '$')]) + self._run_check("", [('endtag', 'a')]) ++ self._run_check("", [('starttag', 'a", [('endtag', 'a'", [('data', "'", []) ++ self._run_check("", [('starttag', 'a$b', [])]) + self._run_check("", [('startendtag', 'a$b', [])]) + self._run_check("", [('starttag', 'a$b', [])]) + self._run_check("", [('startendtag', 'a$b', [])]) ++ self._run_check("", [('endtag', 'a$b')]) + + def test_slashes_in_starttag(self): + self._run_check('', [('startendtag', 'a', [('foo', 'var')])]) +@@ -537,13 +545,56 @@ def test_EOF_in_charref(self): + for html, expected in data: + self._run_check(html, expected) + +- def test_broken_comments(self): +- html = ('' ++ def test_eof_in_comments(self): ++ data = [ ++ ('', [('comment', '-!>')]), ++ ('' + '' + '' + '') + expected = [ ++ ('comment', 'ELEMENT br EMPTY'), + ('comment', ' not really a comment '), + ('comment', ' not a comment either --'), + ('comment', ' -- close enough --'), +@@ -598,6 +649,26 @@ def test_convert_charrefs_dropped_text(self): + ('endtag', 'a'), ('data', ' bar & baz')] + ) + ++ @support.requires_resource('cpu') ++ def test_eof_no_quadratic_complexity(self): ++ # Each of these examples used to take about an hour. ++ # Now they take a fraction of a second. ++ def check(source): ++ parser = html.parser.HTMLParser() ++ parser.feed(source) ++ parser.close() ++ n = 120_000 ++ check("fix-CVE-2025-6069.patch · src-anolis-os/python3 - Gitee.com - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- -
-
- - - - -
- -
-
-
- - -
- - - -
-
-
- - -
-
- -
-
-
- - - - - - - - - -8 - - - -Star -0 - - - -Fork -41 - -
-

- src-anolis-os/python3 - - -

-
-
-
- - - - - - - - -
-
-
-
-
-加入 Gitee -
-
-与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :) -
-免费加入 - -
-
- - -
- -
-
- -
-
-
-
- -
-文件 -
-
-
-
- - - - - -
- - - -
- -
-
- - - - -
-
-
- - - - - - - - -
-
-
- - - - - -
-
-
-
-
-
-
-该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。 - - -
- -
-
- -
- -
-
- -
-
-克隆/下载 - -
- - - - -
-
-
-
- - - - - -
- - - -
- -
-
- - - - -
- - -
- - - - - - -
-
-
-
- -
-
- - -fix-CVE-2025-6069.patch - -10.19 KB -
-
- -一键复制 -编辑 -原始数据 -按行查看 -历史 -
- - - -
-
-
-
-
-wency -提交于 - -2025-08-13 13:46 +08:00 - -. -Add patch to fix CVE-2025-6069 - -
-
-
-
-
-
From 089c6aa56d4e826ef67a492be4b832764273a937 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Fri, 13 Jun 2025 19:57:48 +0300
Subject: [PATCH] [3.11] gh-135462: Fix quadratic complexity in processing
special input in HTMLParser (GH-135464)
End-of-file errors are now handled according to the HTML5 specs --
comments and declarations are automatically closed, tags are ignored.
(cherry picked from commit 6eb6c5dbfb528bd07d77b60fd71fd05d81d45c41)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
---
Lib/html/parser.py | 41 +++++---
Lib/test/test_htmlparser.py | 95 ++++++++++++++++---
...-06-13-15-55-22.gh-issue-135462.KBeJpc.rst | 4 +
3 files changed, 117 insertions(+), 23 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index bef0f4fe4bf776..9c38008bbfd06b 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -25,6 +25,7 @@
charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
starttagopen = re.compile('<[a-zA-Z]')
+endtagopen = re.compile('</[a-zA-Z]')
piclose = re.compile('>')
commentclose = re.compile(r'--\s*>')
# Note:
@@ -176,7 +177,7 @@ def goahead(self, end):
k = self.parse_pi(i)
elif startswith("<!", i):
k = self.parse_html_declaration(i)
- elif (i + 1) < n:
+ elif (i + 1) < n or end:
self.handle_data("<")
k = i + 1
else:
@@ -184,17 +185,35 @@ def goahead(self, end):
if k < 0:
if not end:
break
- k = rawdata.find('>', i + 1)
- if k < 0:
- k = rawdata.find('<', i + 1)
- if k < 0:
- k = i + 1
- else:
- k += 1
- if self.convert_charrefs and not self.cdata_elem:
- self.handle_data(unescape(rawdata[i:k]))
+ if starttagopen.match(rawdata, i): # < + letter
+ pass
+ elif startswith("</", i):
+ if i + 2 == n:
+ self.handle_data("</")
+ elif endtagopen.match(rawdata, i): # </ + letter
+ pass
+ else:
+ # bogus comment
+ self.handle_comment(rawdata[i+2:])
+ elif startswith("<!--", i):
+ j = n
+ for suffix in ("--!", "--", "-"):
+ if rawdata.endswith(suffix, i+4):
+ j -= len(suffix)
+ break
+ self.handle_comment(rawdata[i+4:j])
+ elif startswith("<![CDATA[", i):
+ self.unknown_decl(rawdata[i+3:])
+ elif rawdata[i:i+9].lower() == '<!doctype':
+ self.handle_decl(rawdata[i+2:])
+ elif startswith("<!", i):
+ # bogus comment
+ self.handle_comment(rawdata[i+2:])
+ elif startswith("<?", i):
+ self.handle_pi(rawdata[i+2:])
else:
- self.handle_data(rawdata[i:k])
+ raise AssertionError("we should not get here!")
+ k = n
i = self.updatepos(i, k)
elif startswith("&#", i):
match = charref.match(rawdata, i)
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index 12917755a56017..df775c11310146 100644
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -4,6 +4,8 @@
import pprint
import unittest
+from test import support
+
class EventCollector(html.parser.HTMLParser):
@@ -391,28 +393,34 @@ def test_tolerant_parsing(self):
('data', '<'),
('starttag', 'bc<', [('a', None)]),
('endtag', 'html'),
- ('data', '\n<img src="URL>'),
- ('comment', '/img'),
- ('endtag', 'html<')])
+ ('data', '\n')])
def test_starttag_junk_chars(self):
+ self._run_check("<", [('data', '<')])
+ self._run_check("<>", [('data', '<>')])
+ self._run_check("< >", [('data', '< >')])
+ self._run_check("< ", [('data', '< ')])
self._run_check("</>", [])
+ self._run_check("<$>", [('data', '<$>')])
self._run_check("</$>", [('comment', '$')])
self._run_check("</", [('data', '</')])
- self._run_check("</a", [('data', '</a')])
+ self._run_check("</a", [])
+ self._run_check("</ a>", [('endtag', 'a')])
+ self._run_check("</ a", [('comment', ' a')])
self._run_check("<a<a>", [('starttag', 'a<a', [])])
self._run_check("</a<a>", [('endtag', 'a<a')])
- self._run_check("<!", [('data', '<!')])
- self._run_check("<a", [('data', '<a')])
- self._run_check("<a foo='bar'", [('data', "<a foo='bar'")])
- self._run_check("<a foo='bar", [('data', "<a foo='bar")])
- self._run_check("<a foo='>'", [('data', "<a foo='>'")])
- self._run_check("<a foo='>", [('data', "<a foo='>")])
+ self._run_check("<!", [('comment', '')])
+ self._run_check("<a", [])
+ self._run_check("<a foo='bar'", [])
+ self._run_check("<a foo='bar", [])
+ self._run_check("<a foo='>'", [])
+ self._run_check("<a foo='>", [])
self._run_check("<a$>", [('starttag', 'a$', [])])
self._run_check("<a$b>", [('starttag', 'a$b', [])])
self._run_check("<a$b/>", [('startendtag', 'a$b', [])])
self._run_check("<a$b >", [('starttag', 'a$b', [])])
self._run_check("<a$b />", [('startendtag', 'a$b', [])])
+ self._run_check("</a$b>", [('endtag', 'a$b')])
def test_slashes_in_starttag(self):
self._run_check('<a foo="var"/>', [('startendtag', 'a', [('foo', 'var')])])
@@ -537,13 +545,56 @@ def test_EOF_in_charref(self):
for html, expected in data:
self._run_check(html, expected)
- def test_broken_comments(self):
- html = ('<! not really a comment >'
+ def test_eof_in_comments(self):
+ data = [
+ ('<!--', [('comment', '')]),
+ ('<!---', [('comment', '')]),
+ ('<!----', [('comment', '')]),
+ ('<!-----', [('comment', '-')]),
+ ('<!------', [('comment', '--')]),
+ ('<!----!', [('comment', '')]),
+ ('<!---!', [('comment', '-!')]),
+ ('<!---!>', [('comment', '-!>')]),
+ ('<!--foo', [('comment', 'foo')]),
+ ('<!--foo-', [('comment', 'foo')]),
+ ('<!--foo--', [('comment', 'foo')]),
+ ('<!--foo--!', [('comment', 'foo')]),
+ ('<!--<!--', [('comment', '<!')]),
+ ('<!--<!--!', [('comment', '<!')]),
+ ]
+ for html, expected in data:
+ self._run_check(html, expected)
+
+ def test_eof_in_declarations(self):
+ data = [
+ ('<!', [('comment', '')]),
+ ('<!-', [('comment', '-')]),
+ ('<![', [('comment', '[')]),
+ ('<![CDATA[', [('unknown decl', 'CDATA[')]),
+ ('<![CDATA[x', [('unknown decl', 'CDATA[x')]),
+ ('<![CDATA[x]', [('unknown decl', 'CDATA[x]')]),
+ ('<![CDATA[x]]', [('unknown decl', 'CDATA[x]]')]),
+ ('<!DOCTYPE', [('decl', 'DOCTYPE')]),
+ ('<!DOCTYPE ', [('decl', 'DOCTYPE ')]),
+ ('<!DOCTYPE html', [('decl', 'DOCTYPE html')]),
+ ('<!DOCTYPE html ', [('decl', 'DOCTYPE html ')]),
+ ('<!DOCTYPE html PUBLIC', [('decl', 'DOCTYPE html PUBLIC')]),
+ ('<!DOCTYPE html PUBLIC "foo', [('decl', 'DOCTYPE html PUBLIC "foo')]),
+ ('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "foo',
+ [('decl', 'DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "foo')]),
+ ]
+ for html, expected in data:
+ self._run_check(html, expected)
+
+ def test_bogus_comments(self):
+ html = ('<!ELEMENT br EMPTY>'
+ '<! not really a comment >'
'<! not a comment either -->'
'<! -- close enough -->'
'<!><!<-- this was an empty comment>'
'<!!! another bogus comment !!!>')
expected = [
+ ('comment', 'ELEMENT br EMPTY'),
('comment', ' not really a comment '),
('comment', ' not a comment either --'),
('comment', ' -- close enough --'),
@@ -598,6 +649,26 @@ def test_convert_charrefs_dropped_text(self):
('endtag', 'a'), ('data', ' bar & baz')]
)
+ @support.requires_resource('cpu')
+ def test_eof_no_quadratic_complexity(self):
+ # Each of these examples used to take about an hour.
+ # Now they take a fraction of a second.
+ def check(source):
+ parser = html.parser.HTMLParser()
+ parser.feed(source)
+ parser.close()
+ n = 120_000
+ check("<a " * n)
+ check("<a a=" * n)
+ check("</a " * 14 * n)
+ check("</a a=" * 11 * n)
+ check("<!--" * 4 * n)
+ check("<!" * 60 * n)
+ check("<?" * 19 * n)
+ check("</$" * 15 * n)
+ check("<![CDATA[" * 9 * n)
+ check("<!doctype" * 35 * n)
+
class AttributesTestCase(TestCaseBase):
diff --git a/Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst b/Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst
new file mode 100644
index 00000000000000..cf9aa8dbdf2efe
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst
@@ -0,0 +1,4 @@
+Fix quadratic complexity in processing specially crafted input in
+:class:`html.parser.HTMLParser`. End-of-file errors are now handled according
+to the HTML5 specs -- comments and declarations are automatically closed,
+tags are ignored.
-
- - -
-
-
-
Loading...
-
-
-
- - - -
- - - - - - - -
- - -
-
-
- -
-
-
- -马建仓 AI 助手 -
-
- -
-
-
-
-
-
-
-
-
-
尝试更多
-
-
代码解读
-
-
-
代码找茬
-
-
-
代码优化
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- - - - -
-
-
- - - - - - -
- - - - -
-
-
1
-
https://gitee.com/src-anolis-os/python3.git
-
git@gitee.com:src-anolis-os/python3.git
-
src-anolis-os
-
python3
-
python3
-
cd762fafb1e37a6a832e8f4666436d3252f84371
-
- - - - - - - - - - - - - -
-
- -
- - - - - - - - - - - - -
- - - - - - - - - - - - - - - - - - - - -- Gitee