Revision: 2620
Author: janne.t.harkonen
Date: Fri Mar 12 04:32:45 2010
Log: Handle entityrefs that are not part of ISO-8859-1 properly
http://code.google.com/p/robotframework/source/detail?r=2620

Modified:
 /trunk/src/robot/parsing/htmlreader.py
 /trunk/utest/parsing/test_htmlreader.py

=======================================
--- /trunk/src/robot/parsing/htmlreader.py      Fri Mar 12 04:32:38 2010
+++ /trunk/src/robot/parsing/htmlreader.py      Fri Mar 12 04:32:45 2010
@@ -80,9 +80,12 @@
         if extra_entitydefs.has_key(name):
             return extra_entitydefs[name]
         try:
-            return entitydefs[name].decode('ISO-8859-1')
+            value = entitydefs[name]
         except KeyError:
             return '&'+name+';'
+        if value.startswith('&#'):
+            return unichr(int(value[2:-1]))
+        return value.decode('ISO-8859-1')

     def handle_charref(self, number):
         value = self._handle_charref(number)
=======================================
--- /trunk/utest/parsing/test_htmlreader.py     Fri Mar 12 04:32:38 2010
+++ /trunk/utest/parsing/test_htmlreader.py     Fri Mar 12 04:32:45 2010
@@ -131,6 +131,8 @@
                           ('Uuml', u'\u00DC'),
                           ('Aring', u'\u00C5'),
                           ('Ntilde', u'\u00D1'),
+                          ('nabla', u'\u2207'),
+                          ('ldquo', u'\u201c'),
                           ('invalid', '&invalid;') ]:
             self.reader.handle_entityref(inp)
             msg = '%s: %r != %r' % (inp,  self.response, exp)

Reply via email to