Revision: 2620
Author: janne.t.harkonen
Date: Fri Mar 12 04:32:45 2010
Log: Handle entityrefs that are not part of ISO-8859-1 properly
http://code.google.com/p/robotframework/source/detail?r=2620
Modified:
/trunk/src/robot/parsing/htmlreader.py
/trunk/utest/parsing/test_htmlreader.py
=======================================
--- /trunk/src/robot/parsing/htmlreader.py Fri Mar 12 04:32:38 2010
+++ /trunk/src/robot/parsing/htmlreader.py Fri Mar 12 04:32:45 2010
@@ -80,9 +80,12 @@
if extra_entitydefs.has_key(name):
return extra_entitydefs[name]
try:
- return entitydefs[name].decode('ISO-8859-1')
+ value = entitydefs[name]
except KeyError:
return '&'+name+';'
+ if value.startswith('&#'):
+ return unichr(int(value[2:-1]))
+ return value.decode('ISO-8859-1')
def handle_charref(self, number):
value = self._handle_charref(number)
=======================================
--- /trunk/utest/parsing/test_htmlreader.py Fri Mar 12 04:32:38 2010
+++ /trunk/utest/parsing/test_htmlreader.py Fri Mar 12 04:32:45 2010
@@ -131,6 +131,8 @@
('Uuml', u'\u00DC'),
('Aring', u'\u00C5'),
('Ntilde', u'\u00D1'),
+ ('nabla', u'\u2207'),
+ ('ldquo', u'\u201c'),
('invalid', '&invalid;') ]:
self.reader.handle_entityref(inp)
msg = '%s: %r != %r' % (inp, self.response, exp)