3 new revisions:

Revision: fc1716597301
Branch:   default
Author:   Robot Framework Developers (robotframew...@gmail.com)
Date:     Tue Sep 17 13:21:20 2013 UTC
Log: utils.escaping: refactored code and cleaned up tests to ease further d...
http://code.google.com/p/robotframework/source/detail?r=fc1716597301

Revision: 01ccda1fddcf
Branch:   default
Author:   Robot Framework Developers (robotframew...@gmail.com)
Date:     Tue Sep 17 14:44:32 2013 UTC
Log: Support for \xXX, \uXXXX and \UXXXXXXXX escapes in the test data....
http://code.google.com/p/robotframework/source/detail?r=01ccda1fddcf

Revision: 3afb2a6388a9
Branch:   default
Author:   Robot Framework Developers (robotframew...@gmail.com)
Date:     Tue Sep 17 14:44:38 2013 UTC
Log:      Automated merge with https://robotframework.googlecode.com/hg/
http://code.google.com/p/robotframework/source/detail?r=3afb2a6388a9

==============================================================================
Revision: fc1716597301
Branch:   default
Author:   Robot Framework Developers (robotframew...@gmail.com)
Date:     Tue Sep 17 13:21:20 2013 UTC
Log: utils.escaping: refactored code and cleaned up tests to ease further development.
http://code.google.com/p/robotframework/source/detail?r=fc1716597301

Modified:
 /src/robot/utils/escaping.py
 /utest/utils/test_escaping.py

=======================================
--- /src/robot/utils/escaping.py        Thu Jun  6 14:00:44 2013 UTC
+++ /src/robot/utils/escaping.py        Tue Sep 17 13:21:20 2013 UTC
@@ -15,7 +15,6 @@
 import re


-_ESCAPE_RE = re.compile(r'(\\+)([^\\]{0,2})')   # escapes and nextchars
 _SEQS_TO_BE_ESCAPED = ('\\', '${', '@{', '%{', '&{', '*{', '=')


@@ -29,38 +28,56 @@


 def unescape(item):
-    if not isinstance(item, basestring):
+    if not (isinstance(item, basestring) and '\\' in item):
         return item
-    result = []
-    unprocessed = item
-    while True:
-        res = _ESCAPE_RE.search(unprocessed)
-        # If no escapes found append string to result and exit loop
-        if res is None:
-            result.append(unprocessed)
-            break
- # Split string to pre match, escapes, nextchars and unprocessed parts - # (e.g. '<pre><esc><nc><unproc>') where nextchars contains 0-2 chars
-        # and unprocessed may contain more escapes. Pre match part contains
-        # no escapes can is appended directly to result.
-        result.append(unprocessed[:res.start()])
-        escapes = res.group(1)
-        nextchars = res.group(2)
-        unprocessed = unprocessed[res.end():]
-        # Append every second escape char to result
-        result.append('\\' * (len(escapes) / 2))
-        # Handle '\n', '\r' and '\t'. Note that both '\n' and '\n ' are
-        # converted to '\n'
-        if len(escapes) % 2 == 0 or len(nextchars) == 0 \
-                    or nextchars[0] not in ['n','r','t']:
-            result.append(nextchars)
-        elif nextchars[0] == 'n':
-            if len(nextchars) == 1 or nextchars[1] == ' ':
-                result.append('\n')
+    return Unescaper().unescape(item)
+
+
+class Unescaper(object):
+    _escaped = re.compile(r'(\\+)([^\\]*)')
+
+    def unescape(self, string):
+        return ''.join(self._yield_unescaped(string))
+
+    def _yield_unescaped(self, string):
+        while '\\' in string:
+            finder = EscapeFinder(string)
+            yield finder.before + finder.backslashes
+            if finder.escaped and finder.text:
+                yield self._unescape(finder.text)
             else:
-                result.append('\n' + nextchars[1])
-        elif nextchars[0] == 'r':
-            result.append('\r' + nextchars[1:])
+                yield finder.text
+            string = finder.after
+        yield string
+
+    def _unescape(self, text):
+        try:
+            unescaper = getattr(self, '_unescaper_for_' + text[0])
+        except AttributeError:
+            return text
         else:
-            result.append('\t' + nextchars[1:])
-    return ''.join(result)
+            return unescaper(text[1:])
+
+    def _unescaper_for_n(self, text):
+        if text.startswith(' '):
+            text = text[1:]
+        return '\n' + text
+
+    def _unescaper_for_r(self, text):
+        return '\r' + text
+
+    def _unescaper_for_t(self, text):
+        return '\t' + text
+
+
+class EscapeFinder(object):
+    _escaped = re.compile(r'(\\+)([^\\]*)')
+
+    def __init__(self, string):
+        res = self._escaped.search(string)
+        self.before = string[:res.start()]
+        escape_chars = len(res.group(1))
+        self.backslashes = '\\' * (escape_chars // 2)
+        self.escaped = bool(escape_chars % 2)
+        self.text = res.group(2)
+        self.after = string[res.end():]
=======================================
--- /utest/utils/test_escaping.py       Fri Sep 28 09:54:38 2012 UTC
+++ /utest/utils/test_escaping.py       Tue Sep 17 13:21:20 2013 UTC
@@ -5,97 +5,102 @@
 from robot.utils.escaping import escape, unescape


+def assert_unescape(inp, exp):
+    assert_equals(unescape(inp), exp, repr(inp))
+
+
 class TestUnEscape(unittest.TestCase):

     def test_no_unescape(self):
-        for inp in [ 'no escapes', '' ]:
-            assert_equals(unescape(inp), inp)
+        for inp in ['no escapes', '']:
+            assert_unescape(inp, inp)

     def test_single_backslash(self):
-        for inp, exp in [ ('\\', ''),
-                          ('\\ ', ' '),
-                          ('a\\', 'a'),
-                          ('\\a', 'a'),
-                          ('a\\b\\c\\d', 'abcd') ]:
-            assert_equals(unescape(inp), exp, inp)
+        for inp, exp in [('\\', ''),
+                         ('\\ ', ' '),
+                         ('\\ ', ' '),
+                         ('a\\', 'a'),
+                         ('\\a', 'a'),
+                         ('a\\b\\c\\d', 'abcd')]:
+            assert_unescape(inp, exp)

     def test_multiple_backslash(self):
-        for inp, exp in [ ('\\\\', '\\'),
-                          ('\\\\\\', '\\'),
-                          ('\\\\\\\\', '\\\\'),
-                          ('x\\\\x', 'x\\x'),
-                          ('x\\\\\\x', 'x\\x'),
-                          ('x\\\\\\\\x', 'x\\\\x') ]:
-            assert_equals(unescape(inp), exp, inp)
+        for inp, exp in [('\\\\', '\\'),
+                         ('\\\\\\', '\\'),
+                         ('\\\\\\\\', '\\\\'),
+                         ('\\\\\\\\\\', '\\\\'),
+                         ('x\\\\x', 'x\\x'),
+                         ('x\\\\\\x', 'x\\x'),
+                         ('x\\\\\\\\x', 'x\\\\x')]:
+            assert_unescape(inp, exp)

-    def test_lf(self):
-        for inp, exp in [ ('\\n', '\n'),
-                          ('\\\\n', '\\n'),
-                          ('\\\\\\n', '\\\n'),
-                          ('\\n ', '\n'),
-                          ('\\\\n ', '\\n '),
-                          ('\\\\\\n ', '\\\n'),
-                          ('\\nx', '\nx'),
-                          ('\\\\nx', '\\nx'),
-                          ('\\\\\\nx', '\\\nx'),
-                          ('\\n x', '\nx'),
-                          ('\\\\n x', '\\n x'),
-                          ('\\\\\\n x', '\\\nx') ]:
-            assert_equals(unescape(inp), exp, "'%s'" % inp)
+    def test_newline(self):
+        for inp, exp in [('\\n', '\n'),
+                         ('\\\\n', '\\n'),
+                         ('\\\\\\n', '\\\n'),
+                         ('\\n ', '\n'),
+                         ('\\\\n ', '\\n '),
+                         ('\\\\\\n ', '\\\n'),
+                         ('\\nx', '\nx'),
+                         ('\\\\nx', '\\nx'),
+                         ('\\\\\\nx', '\\\nx'),
+                         ('\\n x', '\nx'),
+                         ('\\\\n x', '\\n x'),
+                         ('\\\\\\n x', '\\\nx')]:
+            assert_unescape(inp, exp)

-    def test_cr(self):
-        for inp, exp in [ ('\\r', '\r'),
-                          ('\\\\r', '\\r'),
-                          ('\\\\\\r', '\\\r'),
-                          ('\\r ', '\r '),
-                          ('\\\\r ', '\\r '),
-                          ('\\\\\\r ', '\\\r '),
-                          ('\\rx', '\rx'),
-                          ('\\\\rx', '\\rx'),
-                          ('\\\\\\rx', '\\\rx'),
-                          ('\\r x', '\r x'),
-                          ('\\\\r x', '\\r x'),
-                          ('\\\\\\r x', '\\\r x') ]:
-            assert_equals(unescape(inp), exp, inp)
+    def test_carriage_return(self):
+        for inp, exp in [('\\r', '\r'),
+                         ('\\\\r', '\\r'),
+                         ('\\\\\\r', '\\\r'),
+                         ('\\r ', '\r '),
+                         ('\\\\r ', '\\r '),
+                         ('\\\\\\r ', '\\\r '),
+                         ('\\rx', '\rx'),
+                         ('\\\\rx', '\\rx'),
+                         ('\\\\\\rx', '\\\rx'),
+                         ('\\r x', '\r x'),
+                         ('\\\\r x', '\\r x'),
+                         ('\\\\\\r x', '\\\r x')]:
+            assert_unescape(inp, exp)

     def test_tab(self):
-        for inp, exp in [ ('\\t', '\t'),
-                          ('\\\\t', '\\t'),
-                          ('\\\\\\t', '\\\t'),
-                          ('\\t ', '\t '),
-                          ('\\\\t ', '\\t '),
-                          ('\\\\\\t ', '\\\t '),
-                          ('\\tx', '\tx'),
-                          ('\\\\tx', '\\tx'),
-                          ('\\\\\\tx', '\\\tx'),
-                          ('\\t x', '\t x'),
-                          ('\\\\t x', '\\t x'),
-                          ('\\\\\\t x', '\\\t x') ]:
-            assert_equals(unescape(inp), exp, inp)
+        for inp, exp in [('\\t', '\t'),
+                         ('\\\\t', '\\t'),
+                         ('\\\\\\t', '\\\t'),
+                         ('\\t ', '\t '),
+                         ('\\\\t ', '\\t '),
+                         ('\\\\\\t ', '\\\t '),
+                         ('\\tx', '\tx'),
+                         ('\\\\tx', '\\tx'),
+                         ('\\\\\\tx', '\\\tx'),
+                         ('\\t x', '\t x'),
+                         ('\\\\t x', '\\t x'),
+                         ('\\\\\\t x', '\\\t x')]:
+            assert_unescape(inp, exp)


 class TestEscape(unittest.TestCase):

     def test_escape(self):
-        for inp, exp in [ ('nothing to escape', 'nothing to escape'),
-                          ('still nothing $ @', 'still nothing $ @' ),
- ('1 backslash to 2: \\', '1 backslash to 2: \\\\'),
-                          ('3 bs to 6: \\\\\\', '3 bs to 6: \\\\\\\\\\\\'),
-                          ('\\' * 1000, '\\' * 2000 ),
-                          ('${notvar}', '\\${notvar}'),
-                          ('@{notvar}', '\\@{notvar}'),
-                          ('${nv} ${nv} @{nv}', '\\${nv} \\${nv} \\@{nv}'),
-                          ('\\${already esc}', '\\\\\\${already esc}'),
-                          ('\\${ae} \\\\@{ae} \\\\\\@{ae}',
- '\\\\\\${ae} \\\\\\\\\\@{ae} \\\\\\\\\\\\\\@{ae}'),
-                          ('%{reserved}', '\\%{reserved}'),
-                          ('&{reserved}', '\\&{reserved}'),
-                          ('*{reserved}', '\\*{reserved}'),
-                          ('x{notreserved}', 'x{notreserved}'),
-                          ]:
+        for inp, exp in [('nothing to escape', 'nothing to escape'),
+                         ('still nothing $ @', 'still nothing $ @' ),
+ ('1 backslash to 2: \\', '1 backslash to 2: \\\\'),
+                         ('3 bs to 6: \\\\\\', '3 bs to 6: \\\\\\\\\\\\'),
+                         ('\\' * 1000, '\\' * 2000 ),
+                         ('${notvar}', '\\${notvar}'),
+                         ('@{notvar}', '\\@{notvar}'),
+                         ('${nv} ${nv} @{nv}', '\\${nv} \\${nv} \\@{nv}'),
+                         ('\\${already esc}', '\\\\\\${already esc}'),
+                         ('\\${ae} \\\\@{ae} \\\\\\@{ae}',
+ '\\\\\\${ae} \\\\\\\\\\@{ae} \\\\\\\\\\\\\\@{ae}'),
+                         ('%{reserved}', '\\%{reserved}'),
+                         ('&{reserved}', '\\&{reserved}'),
+                         ('*{reserved}', '\\*{reserved}'),
+                         ('x{notreserved}', 'x{notreserved}'),
+                         ('named=arg', 'named\\=arg')]:
             assert_equals(escape(inp), exp, inp)


 if __name__ == '__main__':
     unittest.main()
-

==============================================================================
Revision: 01ccda1fddcf
Branch:   default
Author:   Robot Framework Developers (robotframew...@gmail.com)
Date:     Tue Sep 17 14:44:32 2013 UTC
Log:      Support for \xXX, \uXXXX and \UXXXXXXXX escapes in the test data.

Update issue 1524
Status: Started
Implementation and unit tests for \x, \u and \U.

Todo:
- Decide is \N actually needed.
- Acceptance tests.
- Documentation in User Guide.
http://code.google.com/p/robotframework/source/detail?r=01ccda1fddcf

Modified:
 /src/robot/utils/escaping.py
 /utest/utils/test_escaping.py

=======================================
--- /src/robot/utils/escaping.py        Tue Sep 17 13:21:20 2013 UTC
+++ /src/robot/utils/escaping.py        Tue Sep 17 14:44:32 2013 UTC
@@ -69,6 +69,28 @@
     def _unescaper_for_t(self, text):
         return '\t' + text

+    def _unescaper_for_x(self, text):
+        return self._unescape_character(text, 2, 'x')
+
+    def _unescaper_for_u(self, text):
+        return self._unescape_character(text, 4, 'u')
+
+    def _unescaper_for_U(self, text):
+        return self._unescape_character(text, 8, 'U')
+
+    def _unescape_character(self, text, length, escape):
+        try:
+            ordinal = self._get_ordinal(text, length)
+        except ValueError:
+            return escape + text
+        else:
+            return unichr(ordinal) + text[length:]
+
+    def _get_ordinal(self, text, length):
+        if len(text) < length:
+            raise ValueError
+        return int(text[:length], 16)
+

 class EscapeFinder(object):
     _escaped = re.compile(r'(\\+)([^\\]*)')
=======================================
--- /utest/utils/test_escaping.py       Tue Sep 17 13:21:20 2013 UTC
+++ /utest/utils/test_escaping.py       Tue Sep 17 14:44:32 2013 UTC
@@ -79,6 +79,37 @@
                          ('\\\\\\t x', '\\\t x')]:
             assert_unescape(inp, exp)

+    def test_invalid_x(self):
+        for inp in r'\x \xxx xx\xxx \x0 \x0g \X00'.split():
+            assert_unescape(inp, inp.replace('\\', ''))
+
+    def test_valid_x(self):
+        for inp, exp in [(r'\x00', u'\x00'),
+                         (r'\xab\xBA', u'\xab\xba'),
+                         (r'\xe4iti', u'\xe4iti')]:
+            assert_unescape(inp, exp)
+
+    def test_invalid_u(self):
+        for inp in r'\u \ukekkonen b\uu \u0 \u123 \u123x'.split():
+            assert_unescape(inp, inp.replace('\\', ''))
+
+    def test_valid_u(self):
+        for inp, exp in [(r'\u0000', u'\x00'),
+                         (r'\uABba', u'\uabba'),
+                         (r'\u00e4iti', u'\xe4iti')]:
+            assert_unescape(inp, exp)
+
+    def test_invalid_U(self):
+        for inp in r'\U \Ukekkonen b\Uu \U0 \U1234567 \U1234567x'.split():
+            assert_unescape(inp, inp.replace('\\', ''))
+
+    def test_valid_U(self):
+        for inp, exp in [(r'\U00000000', u'\x00'),
+                         (r'\U0000ABba', u'\uabba'),
+                         (r'\U00010905', u'\U00010905'),
+                         (r'\U000000e4iti', u'\xe4iti')]:
+            assert_unescape(inp, exp)
+

 class TestEscape(unittest.TestCase):


==============================================================================
Revision: 3afb2a6388a9
Branch:   default
Author:   Robot Framework Developers (robotframew...@gmail.com)
Date:     Tue Sep 17 14:44:38 2013 UTC
Log:      Automated merge with https://robotframework.googlecode.com/hg/
http://code.google.com/p/robotframework/source/detail?r=3afb2a6388a9


--

--- You received this message because you are subscribed to the Google Groups "robotframework-commit" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to robotframework-commit+unsubscr...@googlegroups.com.
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to