Script 'mail_helper' called by obssrc
Hello community,
here is the log from the commit of package python-sgmllib3k for
openSUSE:Factory checked in at 2022-02-07 23:38:26
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-sgmllib3k (Old)
and /work/SRC/openSUSE:Factory/.python-sgmllib3k.new.1898 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-sgmllib3k"
Mon Feb 7 23:38:26 2022 rev:4 rq:952243 version:1.0.0
Changes:
--------
--- /work/SRC/openSUSE:Factory/python-sgmllib3k/python-sgmllib3k.changes
2022-02-06 23:56:10.082057134 +0100
+++
/work/SRC/openSUSE:Factory/.python-sgmllib3k.new.1898/python-sgmllib3k.changes
2022-02-07 23:40:00.777655155 +0100
@@ -1,0 +2,7 @@
+Mon Feb 7 20:35:47 UTC 2022 - Matej Cepl <[email protected]>
+
+- Add test_sgmllib.py and sgml_input.html as vendored sources.
+- Add adjust_test_sgmllib.patch to fix the original Python 2.7
+ files to make test suite work (Python 3.10 included).
+
+-------------------------------------------------------------------
New:
----
adjust_test_sgmllib.patch
sgml_input.html
test_sgmllib.py
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ python-sgmllib3k.spec ++++++
--- /var/tmp/diff_new_pack.yCu3yO/_old 2022-02-07 23:40:01.185652363 +0100
+++ /var/tmp/diff_new_pack.yCu3yO/_new 2022-02-07 23:40:01.193652308 +0100
@@ -1,7 +1,7 @@
#
# spec file for package python-sgmllib3k
#
-# Copyright (c) 2021 SUSE LLC
+# Copyright (c) 2022 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@@ -17,23 +17,25 @@
%define skip_python2 1
-%define skip_python310 1
%{?!python_module:%define python_module() python-%{**} python3-%{**}}
Name: python-sgmllib3k
Version: 1.0.0
Release: 0
Summary: Python 3 port of sgmllib
-License: Python-2.0 AND BSD-3-Clause
+License: BSD-3-Clause AND Python-2.0
Group: Development/Languages/Python
URL: https://github.com/hsoft/sgmllib
Source:
https://files.pythonhosted.org/packages/source/s/sgmllib3k/sgmllib3k-%{version}.tar.gz
Source1: https://raw.githubusercontent.com/hsoft/sgmllib/master/LICENSE
+Source5: test_sgmllib.py
+Source6: sgml_input.html
+# PATCH-{FIX|FEATURE}-{OPENSUSE|SLE|UPSTREAM} name-of-file.patch bsc#[0-9]+
[email protected]
+# this patch makes things totally awesome
+Patch0: adjust_test_sgmllib.patch
BuildRequires: %{python_module setuptools}
BuildRequires: %{python_module testsuite}
BuildRequires: fdupes
BuildRequires: python-rpm-macros
-# as a source for the tests!
-BuildRequires: python2-devel
BuildArch: noarch
%python_subpackages
@@ -43,14 +45,18 @@
%prep
%setup -q -n sgmllib3k-%{version}
cp %{SOURCE1} .
+cp %{SOURCE5} .
+cp %{SOURCE6} .
-cp %{_libdir}/python2.7/test/test_sgmllib.py .
-cp %{_libdir}/python2.7/test/sgml_input.html .
-sed -i 's/from test import test_support/from test import support as
test_support/' test_sgmllib.py
+%autopatch -p1
+
+# cp %%{_libdir}/python2.7/test/test_sgmllib.py .
+# cp %%{_libdir}/python2.7/test/sgml_input.html .
+# sed -i 's/from test import test_support/from test import support as
test_support/' test_sgmllib.py
# Disable one test failing with
# UnicodeDecodeError: 'utf-8' codec can't decode byte 0xed in position 2873:
invalid continuation byte
-sed -i 's/test_read_chunks/_test_read_chunks/' test_sgmllib.py
+# sed -i 's/test_read_chunks/_test_read_chunks/' test_sgmllib.py
%build
%python_build
@@ -60,11 +66,12 @@
%python_expand %fdupes %{buildroot}%{$python_sitelib}
%check
-%pyunittest test_sgmllib
+%pyunittest -v test_sgmllib
%files %{python_files}
%doc README
%license LICENSE
-%{python_sitelib}/*
+%{python_sitelib}/sgmllib*
+%pycache_only %{python_sitelib}/__pycache__/sgmllib*.pyc
%changelog
++++++ adjust_test_sgmllib.patch ++++++
---
test_sgmllib.py | 23 +++++++++++++----------
1 file changed, 13 insertions(+), 10 deletions(-)
--- a/test_sgmllib.py
+++ b/test_sgmllib.py
@@ -1,8 +1,9 @@
+import io
import pprint
import re
import unittest
-from test import test_support
-sgmllib = test_support.import_module('sgmllib', deprecated=True)
+import sgmllib
+import sys
class EventCollector(sgmllib.SGMLParser):
@@ -308,6 +309,8 @@ DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.
("starttag", "a", []),
])
+ @unittest.skipIf(sys.version_info[:2] >= (3, 10),
+ "_markupbase is stricter in 3.10")
def test_declaration_junk_chars(self):
self.check_parse_error("<!DOCTYPE foo $ >")
@@ -357,13 +360,13 @@ DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.
# Just verify this code doesn't cause a hang.
CHUNK = 1024 # increasing this to 8212 makes the problem go away
- f = open(test_support.findfile('sgml_input.html'))
- fp = sgmllib.SGMLParser()
- while 1:
- data = f.read(CHUNK)
- fp.feed(data)
- if len(data) != CHUNK:
- break
+ with io.open('sgml_input.html', 'r', encoding="latin1") as f:
+ fp = sgmllib.SGMLParser()
+ while 1:
+ data = f.read(CHUNK)
+ fp.feed(data)
+ if len(data) != CHUNK:
+ break
def test_only_decode_ascii(self):
# SF bug #1651995, make sure non-ascii character references are not
decoded
@@ -432,7 +435,7 @@ DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.
def test_main():
- test_support.run_unittest(SGMLParserTestCase)
+ unittest.TextTestRunner(verbosity=2).run(suite(SGMLParserTestCase))
if __name__ == "__main__":
++++++ sgml_input.html ++++++
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
<link rel="stylesheet" type="text/css"
href="http://ogame182.de/epicblue/formate.css">
<script language="JavaScript" src="js/flotten.js"></script>
</head>
<body>
<script language=JavaScript> if (parent.frames.length == 0) {
top.location.href = "http://es.ogame.org/"; } </script> <script
language="JavaScript">
function haha(z1) {
eval("location='"+z1.options[z1.selectedIndex].value+"'");
}
</script>
<center>
<table>
<tr>
<td></td>
<td>
<center>
<table>
<tr>
<td><img
src="http://ogame182.de/epicblue/planeten/small/s_dschjungelplanet04.jpg"
width="50" height="50"></td>
<td>
<table border="1">
<select size="1" onchange="haha(this)">
<option
value="/game/flotten1.php?session=8912ae912fec&cp=33875341&mode=Flotte&gid=&messageziel=&re=0"
selected>Alien sex friend [2:250:6]</option>
<option
value="/game/flotten1.php?session=8912ae912fec&cp=33905100&mode=Flotte&gid=&messageziel=&re=0"
>1989 [2:248:14]</option>
<option
value="/game/flotten1.php?session=8912ae912fec&cp=34570808&mode=Flotte&gid=&messageziel=&re=0"
>1990 [2:248:6]</option>
<option
value="/game/flotten1.php?session=8912ae912fec&cp=34570858&mode=Flotte&gid=&messageziel=&re=0"
>1991 [2:254:6]</option>
<option
value="/game/flotten1.php?session=8912ae912fec&cp=34572929&mode=Flotte&gid=&messageziel=&re=0"
>Colonia [2:253:12]</option>
</select>
</table>
</td>
</tr>
</table>
</center>
</td>
<td>
<table border="0" width="100%" cellspacing="0" cellpadding="0">
<tr>
<td align="center"></td>
<td align="center" width="85">
<img border="0" src="http://ogame182.de/epicblue/images/metall.gif"
width="42" height="22">
</td>
<td align="center" width="85">
<img border="0" src="http://ogame182.de/epicblue/images/kristall.gif"
width="42" height="22">
</td>
<td align="center" width="85">
<img border="0" src="http://ogame182.de/epicblue/images/deuterium.gif"
width="42" height="22">
</td>
<td align="center" width="85">
<img border="0" src="http://ogame182.de/epicblue/images/energie.gif"
width="42" height="22">
</td>
<td align="center"></td>
</tr>
<tr>
<td align="center"><i><b> </b></i></td>
<td align="center" width="85"><i><b><font
color="#ffffff">Metal</font></b></i></td>
<td align="center" width="85"><i><b><font
color="#ffffff">Cristal</font></b></i></td>
<td align="center" width="85"><i><b><font
color="#ffffff">Deuterio</font></b></i></td>
<td align="center" width="85"><i><b><font
color="#ffffff">Energ?a</font></b></i></td>
<td align="center"><i><b> </b></i></td>
</tr>
<tr>
<td align="center"></td>
<td align="center" width="85">160.636</td>
<td align="center" width="85">3.406</td>
<td align="center" width="85">39.230</td>
<td align="center" width="85"><font color=#ff0000>-80</font>/3.965</td>
<td align="center"></td>
</tr>
</table>
</tr>
</table>
</center>
<br />
<script language="JavaScript">
<!--
function link_to_gamepay() {
self.location =
"https://www.gamepay.de/?lang=es&serverID=8&userID=129360&gameID=ogame&gui=v2&chksum=a9751afa9e37e6b1b826356bcca45675";
}
//-->
</script>
<center>
<table width="519" border="0" cellpadding="0" cellspacing="1">
<tr height="20">
<td colspan="8" class="c">Flotas (max. 9)</td>
</tr>
<tr height="20">
<th>Num.</th>
<th>Misi?n</th>
<th>Cantidad</th>
<th>Comienzo</th>
<th>Salida</th>
<th>Objetivo</th>
<th>Llegada</th>
<th>Orden</th>
</tr>
<tr height="20">
<th>1</th>
<th>
<a title="">Espionaje</a>
<a title="Flota en el planeta">(F)</a>
</th>
<th> <a title="Sonda de espionaje: 3
">3</a></th>
<th>[2:250:6]</th>
<th>Wed Aug 9 18:00:02</th>
<th>[2:242:5]</th>
<th>Wed Aug 9 18:01:02</th>
<th>
<form action="flotten1.php?session=8912ae912fec" method="POST">
<input type="hidden" name="order_return" value="25054490" />
<input type="submit" value="Enviar de regreso" />
</form>
</th>
</tr>
<tr height="20">
<th>2</th>
<th>
<a title="">Espionaje</a>
<a title="Volver al planeta">(V)</a>
</th>
<th> <a title="Sonda de espionaje: 3
">3</a></th>
<th>[2:250:6]</th>
<th>Wed Aug 9 17:59:55</th>
<th>[2:242:1]</th>
<th>Wed Aug 9 18:01:55</th>
<th>
</th>
</tr>
</table>
<form action="flotten2.php?session=8912ae912fec" method="POST">
<table width="519" border="0" cellpadding="0" cellspacing="1">
<tr height="20">
<td colspan="4" class="c">Nueva misi?n: elegir naves</td>
</tr>
<tr height="20">
<th>Naves</th>
<th>Disponibles</th>
<!-- <th>Gesch.</th> -->
<th>-</th>
<th>-</th>
</tr>
<tr height="20">
<th><a title="Velocidad: 8500">Nave peque?a de carga</a></th>
<th>10<input type="hidden" name="maxship202" value="10"/></th>
<!-- <th>8500 -->
<input type="hidden" name="consumption202" value="10"/>
<input type="hidden" name="speed202" value="8500" /></th>
<input type="hidden" name="capacity202" value="5000" /></th>
<th><a href="javascript:maxShip('ship202');" >m?x</a> </th>
<th><input name="ship202" size="10" value="0" alt="Nave peque?a de carga
10"/></th>
</tr>
<tr height="20">
<th><a title="Velocidad: 12750">Nave grande de carga</a></th>
<th>19<input type="hidden" name="maxship203" value="19"/></th>
<!-- <th>12750 -->
<input type="hidden" name="consumption203" value="50"/>
<input type="hidden" name="speed203" value="12750" /></th>
<input type="hidden" name="capacity203" value="25000" /></th>
<th><a href="javascript:maxShip('ship203');" >m?x</a> </th>
<th><input name="ship203" size="10" value="0" alt="Nave grande de carga
19"/></th>
</tr>
<tr height="20">
<th><a title="Velocidad: 27000">Crucero</a></th>
<th>6<input type="hidden" name="maxship206" value="6"/></th>
<!-- <th>27000 -->
<input type="hidden" name="consumption206" value="300"/>
<input type="hidden" name="speed206" value="27000" /></th>
<input type="hidden" name="capacity206" value="800" /></th>
<th><a href="javascript:maxShip('ship206');" >m?x</a> </th>
<th><input name="ship206" size="10" value="0" alt="Crucero 6"/></th>
</tr>
<tr height="20">
<th><a title="Velocidad: 3400">Reciclador</a></th>
<th>1<input type="hidden" name="maxship209" value="1"/></th>
<!-- <th>3400 -->
<input type="hidden" name="consumption209" value="300"/>
<input type="hidden" name="speed209" value="3400" /></th>
<input type="hidden" name="capacity209" value="20000" /></th>
<th><a href="javascript:maxShip('ship209');" >m?x</a> </th>
<th><input name="ship209" size="10" value="0" alt="Reciclador 1"/></th>
</tr>
<tr height="20">
<th><a title="Velocidad: 170000000">Sonda de espionaje</a></th>
<th>139<input type="hidden" name="maxship210" value="139"/></th>
<!-- <th>170000000 -->
<input type="hidden" name="consumption210" value="1"/>
<input type="hidden" name="speed210" value="170000000" /></th>
<input type="hidden" name="capacity210" value="5" /></th>
<th><a href="javascript:maxShip('ship210');" >m?x</a> </th>
<th><input name="ship210" size="10" value="0" alt="Sonda de espionaje
139"/></th>
</tr>
<tr height="20">
<th colspan="2"><a href="javascript:noShips();" >Ninguna nave</a></th>
<th colspan="2"><a href="javascript:maxShips();" >Todas las naves</a></th>
</tr>
<tr height="20">
<th colspan="4"><input type="submit" value="Continuar" /></th>
</tr>
<tr><th colspan=4>
<iframe id='a44fb522' name='a44fb522'
src='http://ads.gameforgeads.de/adframe.php?n=a44fb522&what=zone:578'
framespacing='0' frameborder='no' scrolling='no' width='468'
height='60'></iframe>
<br><center></center></br>
</th></tr>
</form>
</table>
</body>
</html>
++++++ test_sgmllib.py ++++++
import pprint
import re
import unittest
from test import test_support
sgmllib = test_support.import_module('sgmllib', deprecated=True)
class EventCollector(sgmllib.SGMLParser):
def __init__(self):
self.events = []
self.append = self.events.append
sgmllib.SGMLParser.__init__(self)
def get_events(self):
# Normalize the list of events so that buffer artefacts don't
# separate runs of contiguous characters.
L = []
prevtype = None
for event in self.events:
type = event[0]
if type == prevtype == "data":
L[-1] = ("data", L[-1][1] + event[1])
else:
L.append(event)
prevtype = type
self.events = L
return L
# structure markup
def unknown_starttag(self, tag, attrs):
self.append(("starttag", tag, attrs))
def unknown_endtag(self, tag):
self.append(("endtag", tag))
# all other markup
def handle_comment(self, data):
self.append(("comment", data))
def handle_charref(self, data):
self.append(("charref", data))
def handle_data(self, data):
self.append(("data", data))
def handle_decl(self, decl):
self.append(("decl", decl))
def handle_entityref(self, data):
self.append(("entityref", data))
def handle_pi(self, data):
self.append(("pi", data))
def unknown_decl(self, decl):
self.append(("unknown decl", decl))
class CDATAEventCollector(EventCollector):
def start_cdata(self, attrs):
self.append(("starttag", "cdata", attrs))
self.setliteral()
class HTMLEntityCollector(EventCollector):
entity_or_charref = re.compile('(?:&([a-zA-Z][-.a-zA-Z0-9]*)'
'|&#(x[0-9a-zA-Z]+|[0-9]+))(;?)')
def convert_charref(self, name):
self.append(("charref", "convert", name))
if name[0] != "x":
return EventCollector.convert_charref(self, name)
def convert_codepoint(self, codepoint):
self.append(("codepoint", "convert", codepoint))
EventCollector.convert_codepoint(self, codepoint)
def convert_entityref(self, name):
self.append(("entityref", "convert", name))
return EventCollector.convert_entityref(self, name)
# These to record that they were called, then pass the call along
# to the default implementation so that it's actions can be
# recorded.
def handle_charref(self, data):
self.append(("charref", data))
sgmllib.SGMLParser.handle_charref(self, data)
def handle_entityref(self, data):
self.append(("entityref", data))
sgmllib.SGMLParser.handle_entityref(self, data)
class SGMLParserTestCase(unittest.TestCase):
collector = EventCollector
def get_events(self, source):
parser = self.collector()
try:
for s in source:
parser.feed(s)
parser.close()
except:
#self.events = parser.events
raise
return parser.get_events()
def check_events(self, source, expected_events):
try:
events = self.get_events(source)
except:
#import sys
#print >>sys.stderr, pprint.pformat(self.events)
raise
if events != expected_events:
self.fail("received events did not match expected events\n"
"Expected:\n" + pprint.pformat(expected_events) +
"\nReceived:\n" + pprint.pformat(events))
def check_parse_error(self, source):
parser = EventCollector()
try:
parser.feed(source)
parser.close()
except sgmllib.SGMLParseError:
pass
else:
self.fail("expected SGMLParseError for %r\nReceived:\n%s"
% (source, pprint.pformat(parser.get_events())))
def test_doctype_decl_internal(self):
inside = """\
DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'
SYSTEM 'http://www.w3.org/TR/html401/strict.dtd' [
<!ELEMENT html - O EMPTY>
<!ATTLIST html
version CDATA #IMPLIED
profile CDATA 'DublinCore'>
<!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'>
<!ENTITY myEntity 'internal parsed entity'>
<!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'>
<!ENTITY % paramEntity 'name|name|name'>
%paramEntity;
<!-- comment -->
]"""
self.check_events(["<!%s>" % inside], [
("decl", inside),
])
def test_doctype_decl_external(self):
inside = "DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'"
self.check_events("<!%s>" % inside, [
("decl", inside),
])
def test_underscore_in_attrname(self):
# SF bug #436621
"""Make sure attribute names with underscores are accepted"""
self.check_events("<a has_under _under>", [
("starttag", "a", [("has_under", "has_under"),
("_under", "_under")]),
])
def test_underscore_in_tagname(self):
# SF bug #436621
"""Make sure tag names with underscores are accepted"""
self.check_events("<has_under></has_under>", [
("starttag", "has_under", []),
("endtag", "has_under"),
])
def test_quotes_in_unquoted_attrs(self):
# SF bug #436621
"""Be sure quotes in unquoted attributes are made part of the value"""
self.check_events("<a href=foo'bar\"baz>", [
("starttag", "a", [("href", "foo'bar\"baz")]),
])
def test_xhtml_empty_tag(self):
"""Handling of XHTML-style empty start tags"""
self.check_events("<br />text<i></i>", [
("starttag", "br", []),
("data", "text"),
("starttag", "i", []),
("endtag", "i"),
])
def test_processing_instruction_only(self):
self.check_events("<?processing instruction>", [
("pi", "processing instruction"),
])
def test_bad_nesting(self):
self.check_events("<a><b></a></b>", [
("starttag", "a", []),
("starttag", "b", []),
("endtag", "a"),
("endtag", "b"),
])
def test_bare_ampersands(self):
self.check_events("this text & contains & ampersands &", [
("data", "this text & contains & ampersands &"),
])
def test_bare_pointy_brackets(self):
self.check_events("this < text > contains < bare>pointy< brackets", [
("data", "this < text > contains < bare>pointy< brackets"),
])
def test_attr_syntax(self):
output = [
("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", "e")])
]
self.check_events("""<a b='v' c="v" d=v e>""", output)
self.check_events("""<a b = 'v' c = "v" d = v e>""", output)
self.check_events("""<a\nb\n=\n'v'\nc\n=\n"v"\nd\n=\nv\ne>""", output)
self.check_events("""<a\tb\t=\t'v'\tc\t=\t"v"\td\t=\tv\te>""", output)
def test_attr_values(self):
self.check_events("""<a b='xxx\n\txxx' c="yyy\t\nyyy" d='\txyz\n'>""",
[("starttag", "a", [("b", "xxx\n\txxx"),
("c", "yyy\t\nyyy"),
("d", "\txyz\n")])
])
self.check_events("""<a b='' c="">""", [
("starttag", "a", [("b", ""), ("c", "")]),
])
# URL construction stuff from RFC 1808:
safe = "$-_.+"
extra = "!*'(),"
reserved = ";/?:@&="
url = "http://example.com:8080/path/to/file?%s%s%s" % (
safe, extra, reserved)
self.check_events("""<e a=%s>""" % url, [
("starttag", "e", [("a", url)]),
])
# Regression test for SF patch #669683.
self.check_events("<e a=rgb(1,2,3)>", [
("starttag", "e", [("a", "rgb(1,2,3)")]),
])
def test_attr_values_entities(self):
"""Substitution of entities and charrefs in attribute values"""
# SF bug #1452246
self.check_events("""<a b=< c=<> d=<-> e='< '
f="&xxx;" g=' !' h='Ǵ'
i='x?a=b&c=d;'
j='&#42;' k='&#42;'>""",
[("starttag", "a", [("b", "<"),
("c", "<>"),
("d", "<->"),
("e", "< "),
("f", "&xxx;"),
("g", " !"),
("h", "Ǵ"),
("i", "x?a=b&c=d;"),
("j", "*"),
("k", "*"),
])])
def test_convert_overrides(self):
# This checks that the character and entity reference
# conversion helpers are called at the documented times. No
# attempt is made to really change what the parser accepts.
#
self.collector = HTMLEntityCollector
self.check_events(('<a title="“test”">foo</a>'
'&foobar;*'), [
('entityref', 'convert', 'ldquo'),
('charref', 'convert', 'x201d'),
('starttag', 'a', [('title', '“test”')]),
('data', 'foo'),
('endtag', 'a'),
('entityref', 'foobar'),
('entityref', 'convert', 'foobar'),
('charref', '42'),
('charref', 'convert', '42'),
('codepoint', 'convert', 42),
])
def test_attr_funky_names(self):
self.check_events("""<a a.b='v' c:d=v e-f=v>""", [
("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]),
])
def test_attr_value_ip6_url(self):
# http://www.python.org/sf/853506
self.check_events(("<a href='http://[1080::8:800:200C:417A]/'>"
"<a href=http://[1080::8:800:200C:417A]/>"), [
("starttag", "a", [("href", "http://[1080::8:800:200C:417A]/")]),
("starttag", "a", [("href", "http://[1080::8:800:200C:417A]/")]),
])
def test_weird_starttags(self):
self.check_events("<a<a>", [
("starttag", "a", []),
("starttag", "a", []),
])
self.check_events("</a<a>", [
("endtag", "a"),
("starttag", "a", []),
])
def test_declaration_junk_chars(self):
self.check_parse_error("<!DOCTYPE foo $ >")
def test_get_starttag_text(self):
s = """<foobar \n one="1"\ttwo=2 >"""
self.check_events(s, [
("starttag", "foobar", [("one", "1"), ("two", "2")]),
])
def test_cdata_content(self):
s = ("<cdata> <!-- not a comment --> ¬-an-entity-ref; </cdata>"
"<notcdata> <!-- comment --> </notcdata>")
self.collector = CDATAEventCollector
self.check_events(s, [
("starttag", "cdata", []),
("data", " <!-- not a comment --> ¬-an-entity-ref; "),
("endtag", "cdata"),
("starttag", "notcdata", []),
("data", " "),
("comment", " comment "),
("data", " "),
("endtag", "notcdata"),
])
s = """<cdata> <not a='start tag'> </cdata>"""
self.check_events(s, [
("starttag", "cdata", []),
("data", " <not a='start tag'> "),
("endtag", "cdata"),
])
def test_illegal_declarations(self):
s = 'abc<!spacer type="block" height="25">def'
self.check_events(s, [
("data", "abc"),
("unknown decl", 'spacer type="block" height="25"'),
("data", "def"),
])
def test_enumerated_attr_type(self):
s = "<!DOCTYPE doc [<!ATTLIST doc attr (a | b) >]>"
self.check_events(s, [
('decl', 'DOCTYPE doc [<!ATTLIST doc attr (a | b) >]'),
])
def test_read_chunks(self):
# SF bug #1541697, this caused sgml parser to hang
# Just verify this code doesn't cause a hang.
CHUNK = 1024 # increasing this to 8212 makes the problem go away
f = open(test_support.findfile('sgml_input.html'))
fp = sgmllib.SGMLParser()
while 1:
data = f.read(CHUNK)
fp.feed(data)
if len(data) != CHUNK:
break
def test_only_decode_ascii(self):
# SF bug #1651995, make sure non-ascii character references are not
decoded
s = '<signs exclamation="!" copyright="©" quoteleft="‘">'
self.check_events(s, [
('starttag', 'signs',
[('exclamation', '!'), ('copyright', '©'),
('quoteleft', '‘')]),
])
# XXX These tests have been disabled by prefixing their names with
# an underscore. The first two exercise outstanding bugs in the
# sgmllib module, and the third exhibits questionable behavior
# that needs to be carefully considered before changing it.
def _test_starttag_end_boundary(self):
self.check_events("<a b='<'>", [("starttag", "a", [("b", "<")])])
self.check_events("<a b='>'>", [("starttag", "a", [("b", ">")])])
def _test_buffer_artefacts(self):
output = [("starttag", "a", [("b", "<")])]
self.check_events(["<a b='<'>"], output)
self.check_events(["<a ", "b='<'>"], output)
self.check_events(["<a b", "='<'>"], output)
self.check_events(["<a b=", "'<'>"], output)
self.check_events(["<a b='<", "'>"], output)
self.check_events(["<a b='<'", ">"], output)
output = [("starttag", "a", [("b", ">")])]
self.check_events(["<a b='>'>"], output)
self.check_events(["<a ", "b='>'>"], output)
self.check_events(["<a b", "='>'>"], output)
self.check_events(["<a b=", "'>'>"], output)
self.check_events(["<a b='>", "'>"], output)
self.check_events(["<a b='>'", ">"], output)
output = [("comment", "abc")]
self.check_events(["", "<!--abc-->"], output)
self.check_events(["<", "!--abc-->"], output)
self.check_events(["<!", "--abc-->"], output)
self.check_events(["<!-", "-abc-->"], output)
self.check_events(["<!--", "abc-->"], output)
self.check_events(["<!--a", "bc-->"], output)
self.check_events(["<!--ab", "c-->"], output)
self.check_events(["<!--abc", "-->"], output)
self.check_events(["<!--abc-", "->"], output)
self.check_events(["<!--abc--", ">"], output)
self.check_events(["<!--abc-->", ""], output)
def _test_starttag_junk_chars(self):
self.check_parse_error("<")
self.check_parse_error("<>")
self.check_parse_error("</$>")
self.check_parse_error("</")
self.check_parse_error("</a")
self.check_parse_error("<$")
self.check_parse_error("<$>")
self.check_parse_error("<!")
self.check_parse_error("<a $>")
self.check_parse_error("<a")
self.check_parse_error("<a foo='bar'")
self.check_parse_error("<a foo='bar")
self.check_parse_error("<a foo='>'")
self.check_parse_error("<a foo='>")
self.check_parse_error("<a foo=>")
def test_main():
test_support.run_unittest(SGMLParserTestCase)
if __name__ == "__main__":
test_main()