2 new revisions:

Revision: 48e0be57dd85
Branch:   default
Author:   Pekka Klärck
Date:     Mon Nov 18 21:39:35 2013 UTC
Log: Introduced generic Utf8Reader to ease workarouding IronPython BOM UTF-...
http://code.google.com/p/robotframework/source/detail?r=48e0be57dd85

Revision: c4cbf3d7a5a2
Branch:   default
Author:   Pekka Klärck
Date:     Mon Nov 18 23:59:39 2013 UTC
Log: Utf8Reader: enforce files to be open in binary to prevent problems wit...
http://code.google.com/p/robotframework/source/detail?r=c4cbf3d7a5a2

==============================================================================
Revision: 48e0be57dd85
Branch:   default
Author:   Pekka Klärck
Date:     Mon Nov 18 21:39:35 2013 UTC
Log: Introduced generic Utf8Reader to ease workarouding IronPython BOM UTF-8 bug.

Update issue 1581
Status: Started
Owner: pekka.klarck
Added generic Utf8Reader utility. It doesn't yet handle the IPY bug.
http://code.google.com/p/robotframework/source/detail?r=48e0be57dd85

Added:
 /src/robot/utils/utf8reader.py
Modified:
 /src/robot/parsing/tsvreader.py
 /src/robot/parsing/txtreader.py
 /src/robot/utils/__init__.py
 /src/robot/utils/argumentparser.py

=======================================
--- /dev/null
+++ /src/robot/utils/utf8reader.py      Mon Nov 18 21:39:35 2013 UTC
@@ -0,0 +1,45 @@
+#  Copyright 2008-2013 Nokia Siemens Networks Oyj
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from codecs import BOM_UTF8
+
+
+class Utf8Reader(object):
+
+    def __init__(self, path_or_file):
+        if isinstance(path_or_file, basestring):
+            self._file = open(path_or_file)
+            self._close = True
+        else:
+            self._file = path_or_file
+            self._close = False
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *exc_info):
+        if self._close:
+            self._file.close()
+
+    def read(self):
+        return self._decode(self._file.read())
+
+    def _decode(self, content, remove_bom=True):
+        if remove_bom and content.startswith(BOM_UTF8):
+            content = content[len(BOM_UTF8):]
+        return content.decode('UTF-8')
+
+    def readlines(self):
+        for index, line in enumerate(self._file.readlines()):
+            yield self._decode(line, remove_bom=index == 0)
=======================================
--- /src/robot/parsing/tsvreader.py     Thu Jun  6 14:00:44 2013 UTC
+++ /src/robot/parsing/tsvreader.py     Mon Nov 18 21:39:35 2013 UTC
@@ -12,31 +12,27 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.

-from codecs import BOM_UTF8
+from robot.utils import Utf8Reader


 NBSP = u'\xA0'


-class TsvReader:
+class TsvReader(object):

     def read(self, tsvfile, populator):
         process = False
-        for index, row in enumerate(tsvfile.readlines()):
-            row = self._decode_row(row, index == 0)
-            cells = [self._process(cell) for cell in self.split_row(row)]
-            name = cells and cells[0].strip() or ''
-            if name.startswith('*') and \
- populator.start_table([c.replace('*','') for c in cells]):
+        for row in Utf8Reader(tsvfile).readlines():
+            row = self._process_row(row)
+ cells = [self._process_cell(cell) for cell in self.split_row(row)]
+            if cells and cells[0].strip().startswith('*') and \
+ populator.start_table([c.replace('*', '') for c in cells]):
                 process = True
             elif process:
                 populator.add(cells)
         populator.eof()

-    def _decode_row(self, row, is_first):
-        if is_first and row.startswith(BOM_UTF8):
-            row = row[len(BOM_UTF8):]
-        row = row.decode('UTF-8')
+    def _process_row(self, row):
         if NBSP in row:
             row = row.replace(NBSP, ' ')
         return row.rstrip()
@@ -45,7 +41,7 @@
     def split_row(cls, row):
         return row.split('\t')

-    def _process(self, cell):
+    def _process_cell(self, cell):
         if len(cell) > 1 and cell[0] == cell[-1] == '"':
-            cell = cell[1:-1].replace('""','"')
+            cell = cell[1:-1].replace('""', '"')
         return cell
=======================================
--- /src/robot/parsing/txtreader.py     Thu Jun  6 14:00:44 2013 UTC
+++ /src/robot/parsing/txtreader.py     Mon Nov 18 21:39:35 2013 UTC
@@ -30,5 +30,5 @@
         row = row[1:-1] if row.endswith(' |') else row[1:]
         return [cell.strip() for cell in cls._pipe_splitter.split(row)]

-    def _process(self, cell):
+    def _process_cell(self, cell):
         return cell
=======================================
--- /src/robot/utils/__init__.py        Thu Jun 13 11:23:41 2013 UTC
+++ /src/robot/utils/__init__.py        Mon Nov 18 21:39:35 2013 UTC
@@ -58,6 +58,7 @@
 from .text import (cut_long_message, format_assign_message,
                    pad_console_length, get_console_length)
 from .unic import unic, safe_repr
+from .utf8reader import Utf8Reader

 import sys
 is_jython = sys.platform.startswith('java')
=======================================
--- /src/robot/utils/argumentparser.py  Wed Nov  6 12:38:04 2013 UTC
+++ /src/robot/utils/argumentparser.py  Mon Nov 18 21:39:35 2013 UTC
@@ -19,7 +19,6 @@
 import sys
 import glob
 import string
-import codecs
 import textwrap

 from robot.errors import DataError, Information, FrameworkError
@@ -27,6 +26,7 @@

 from .misc import plural_or_not
 from .encoding import decode_output, decode_from_system
+from .utf8reader import Utf8Reader


 ESCAPES = dict(
@@ -398,14 +398,11 @@

     def _read_from_file(self, path):
         try:
-            with open(path) as f:
-                content = f.read().decode('UTF-8')
+            with Utf8Reader(path) as reader:
+                return reader.read()
         except (IOError, UnicodeError), err:
             raise DataError("Opening argument file '%s' failed: %s"
                             % (path, err))
-        if content.startswith(codecs.BOM_UTF8.decode('UTF-8')):
-            content = content[1:]
-        return content

     def _read_from_stdin(self):
         content = sys.__stdin__.read()

==============================================================================
Revision: c4cbf3d7a5a2
Branch:   default
Author:   Pekka Klärck
Date:     Mon Nov 18 23:59:39 2013 UTC
Log: Utf8Reader: enforce files to be open in binary to prevent problems with IronPython

Update issue 1580
Status: Done
In the end making sure that files are opened in binary mode was enough to fix the problem. Could have removed the earlier added Utf8Reader, but decided to keep it because it anyway handles docoding and removing the BOM.
http://code.google.com/p/robotframework/source/detail?r=c4cbf3d7a5a2

Added:
 /utest/utils/test_utf8reader.py
Modified:
 /atest/robot/parsing/ignore_bom.txt
 /src/robot/parsing/populators.py
 /src/robot/utils/utf8reader.py

=======================================
--- /dev/null
+++ /utest/utils/test_utf8reader.py     Mon Nov 18 23:59:39 2013 UTC
@@ -0,0 +1,66 @@
+from __future__ import with_statement
+from codecs import BOM_UTF8
+from StringIO import StringIO
+import os
+import tempfile
+import unittest
+
+from robot.utils import Utf8Reader
+from robot.utils.asserts import assert_equals, assert_raises
+
+
+PATH = os.path.join(tempfile.gettempdir(), 'test_utf8reader.xml')
+STRING = u'Hyv\xe4\xe4\nty\xf6t\xe4\n.C\u043f\u0430\u0441\u0438\u0431\u043e'
+
+
+class TestUtf8ReaderWithBom(unittest.TestCase):
+    BOM = BOM_UTF8
+
+    def setUp(self):
+        self._create()
+
+    def _create(self, content=STRING, encoding='UTF-8'):
+        with open(PATH, 'wb') as f:
+            f.write(self.BOM + content.encode(encoding))
+
+    def tearDown(self):
+        os.remove(PATH)
+
+    def test_read(self):
+        with Utf8Reader(PATH) as reader:
+            f = reader._file
+            assert_equals(reader.read(), STRING)
+        assert_equals(f.closed, True)
+
+    def test_read_open_file(self):
+        with open(PATH, 'rb') as f:
+            with Utf8Reader(f) as reader:
+                assert_equals(reader.read(), STRING)
+            assert_equals(f.closed, False)
+
+    def test_must_open_in_binary_mode(self):
+        with open(PATH, 'r') as f:
+            assert_raises(ValueError, Utf8Reader, f)
+
+    def test_stringio_is_ok(self):
+        f = StringIO(self.BOM + STRING.encode('UTF-8'))
+        with Utf8Reader(f) as reader:
+            assert_equals(reader.read(), STRING)
+        assert_equals(f.closed, False)
+
+    def test_readlines(self):
+        with Utf8Reader(PATH) as reader:
+ assert_equals(list(reader.readlines()), STRING.splitlines(True))
+
+    def test_invalid_encoding(self):
+        self._create(STRING.splitlines()[-1], 'ISO-8859-5')
+        with Utf8Reader(PATH) as reader:
+            assert_raises(UnicodeDecodeError, reader.read)
+
+
+class TestUtf8ReaderWithoutBom(TestUtf8ReaderWithBom):
+    BOM = ''
+
+
+if __name__ == '__main__':
+    unittest.main()
=======================================
--- /atest/robot/parsing/ignore_bom.txt Mon Apr 12 14:52:47 2010 UTC
+++ /atest/robot/parsing/ignore_bom.txt Mon Nov 18 23:59:39 2013 UTC
@@ -4,12 +4,19 @@
 Force Tags     regression   pybot  jybot
 Resource       atest_resource.txt

-
 *** Test Cases ***
-
 Byte order mark in plain text file
-    Check test case  ${TESTNAME}
+    [Setup]    File Should Have Bom    parsing/bom.txt
+    ${tc} =    Check test case  ${TESTNAME}
+    Check log message    ${tc.kws[0].msgs[0]}    Hyvää päivää €åppa!

+Byte order mark in TSV file
+    [Setup]    File Should Have Bom    parsing/bom.txt
+    ${tc} =    Check test case  ${TESTNAME}
+    Check log message    ${tc.kws[0].msgs[0]}    Hyvää päivää €åppa!

-Byte order mark in TSV file
-    Check test case  ${TESTNAME}
+*** Keywords ***
+File Should Have Bom
+    [Arguments]    ${path}
+    ${content} =    Get File    ${DATADIR}/${path}
+    Should Start With    ${content}    \ufeff    No BOM!!
=======================================
--- /src/robot/parsing/populators.py    Thu Jun  6 14:00:44 2013 UTC
+++ /src/robot/parsing/populators.py    Mon Nov 18 23:59:39 2013 UTC
@@ -65,6 +65,8 @@
         if not os.path.isfile(path):
             raise DataError("Data source does not exist.")
         try:
+            # IronPython handles BOM incorrectly if not using binary mode:
+            # http://code.google.com/p/robotframework/issues/detail?id=1580
             return open(path, 'rb')
         except:
             raise DataError(get_error_message())
=======================================
--- /src/robot/utils/utf8reader.py      Mon Nov 18 21:39:35 2013 UTC
+++ /src/robot/utils/utf8reader.py      Mon Nov 18 23:59:39 2013 UTC
@@ -19,11 +19,15 @@

     def __init__(self, path_or_file):
         if isinstance(path_or_file, basestring):
-            self._file = open(path_or_file)
+            self._file = open(path_or_file, 'rb')
             self._close = True
         else:
             self._file = path_or_file
             self._close = False
+ # IronPython handles BOM incorrectly if file not opened in binary mode:
+        # http://code.google.com/p/robotframework/issues/detail?id=1580
+        if hasattr(self._file, 'mode') and self._file.mode != 'rb':
+            raise ValueError('Only files in binary mode accepted.')

     def __enter__(self):
         return self
@@ -35,11 +39,11 @@
     def read(self):
         return self._decode(self._file.read())

+    def readlines(self):
+        for index, line in enumerate(self._file.readlines()):
+            yield self._decode(line, remove_bom=index == 0)
+
     def _decode(self, content, remove_bom=True):
         if remove_bom and content.startswith(BOM_UTF8):
             content = content[len(BOM_UTF8):]
         return content.decode('UTF-8')
-
-    def readlines(self):
-        for index, line in enumerate(self._file.readlines()):
-            yield self._decode(line, remove_bom=index == 0)

--

--- You received this message because you are subscribed to the Google Groups "robotframework-commit" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to