2 new revisions:
Revision: 48e0be57dd85
Branch: default
Author: Pekka Klärck
Date: Mon Nov 18 21:39:35 2013 UTC
Log: Introduced generic Utf8Reader to ease workarouding IronPython BOM
UTF-...
http://code.google.com/p/robotframework/source/detail?r=48e0be57dd85
Revision: c4cbf3d7a5a2
Branch: default
Author: Pekka Klärck
Date: Mon Nov 18 23:59:39 2013 UTC
Log: Utf8Reader: enforce files to be open in binary to prevent
problems wit...
http://code.google.com/p/robotframework/source/detail?r=c4cbf3d7a5a2
==============================================================================
Revision: 48e0be57dd85
Branch: default
Author: Pekka Klärck
Date: Mon Nov 18 21:39:35 2013 UTC
Log: Introduced generic Utf8Reader to ease workarouding IronPython BOM
UTF-8 bug.
Update issue 1581
Status: Started
Owner: pekka.klarck
Added generic Utf8Reader utility. It doesn't yet handle the IPY bug.
http://code.google.com/p/robotframework/source/detail?r=48e0be57dd85
Added:
/src/robot/utils/utf8reader.py
Modified:
/src/robot/parsing/tsvreader.py
/src/robot/parsing/txtreader.py
/src/robot/utils/__init__.py
/src/robot/utils/argumentparser.py
=======================================
--- /dev/null
+++ /src/robot/utils/utf8reader.py Mon Nov 18 21:39:35 2013 UTC
@@ -0,0 +1,45 @@
+# Copyright 2008-2013 Nokia Siemens Networks Oyj
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from codecs import BOM_UTF8
+
+
+class Utf8Reader(object):
+
+ def __init__(self, path_or_file):
+ if isinstance(path_or_file, basestring):
+ self._file = open(path_or_file)
+ self._close = True
+ else:
+ self._file = path_or_file
+ self._close = False
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *exc_info):
+ if self._close:
+ self._file.close()
+
+ def read(self):
+ return self._decode(self._file.read())
+
+ def _decode(self, content, remove_bom=True):
+ if remove_bom and content.startswith(BOM_UTF8):
+ content = content[len(BOM_UTF8):]
+ return content.decode('UTF-8')
+
+ def readlines(self):
+ for index, line in enumerate(self._file.readlines()):
+ yield self._decode(line, remove_bom=index == 0)
=======================================
--- /src/robot/parsing/tsvreader.py Thu Jun 6 14:00:44 2013 UTC
+++ /src/robot/parsing/tsvreader.py Mon Nov 18 21:39:35 2013 UTC
@@ -12,31 +12,27 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from codecs import BOM_UTF8
+from robot.utils import Utf8Reader
NBSP = u'\xA0'
-class TsvReader:
+class TsvReader(object):
def read(self, tsvfile, populator):
process = False
- for index, row in enumerate(tsvfile.readlines()):
- row = self._decode_row(row, index == 0)
- cells = [self._process(cell) for cell in self.split_row(row)]
- name = cells and cells[0].strip() or ''
- if name.startswith('*') and \
- populator.start_table([c.replace('*','') for c in
cells]):
+ for row in Utf8Reader(tsvfile).readlines():
+ row = self._process_row(row)
+ cells = [self._process_cell(cell) for cell in
self.split_row(row)]
+ if cells and cells[0].strip().startswith('*') and \
+ populator.start_table([c.replace('*', '') for c in
cells]):
process = True
elif process:
populator.add(cells)
populator.eof()
- def _decode_row(self, row, is_first):
- if is_first and row.startswith(BOM_UTF8):
- row = row[len(BOM_UTF8):]
- row = row.decode('UTF-8')
+ def _process_row(self, row):
if NBSP in row:
row = row.replace(NBSP, ' ')
return row.rstrip()
@@ -45,7 +41,7 @@
def split_row(cls, row):
return row.split('\t')
- def _process(self, cell):
+ def _process_cell(self, cell):
if len(cell) > 1 and cell[0] == cell[-1] == '"':
- cell = cell[1:-1].replace('""','"')
+ cell = cell[1:-1].replace('""', '"')
return cell
=======================================
--- /src/robot/parsing/txtreader.py Thu Jun 6 14:00:44 2013 UTC
+++ /src/robot/parsing/txtreader.py Mon Nov 18 21:39:35 2013 UTC
@@ -30,5 +30,5 @@
row = row[1:-1] if row.endswith(' |') else row[1:]
return [cell.strip() for cell in cls._pipe_splitter.split(row)]
- def _process(self, cell):
+ def _process_cell(self, cell):
return cell
=======================================
--- /src/robot/utils/__init__.py Thu Jun 13 11:23:41 2013 UTC
+++ /src/robot/utils/__init__.py Mon Nov 18 21:39:35 2013 UTC
@@ -58,6 +58,7 @@
from .text import (cut_long_message, format_assign_message,
pad_console_length, get_console_length)
from .unic import unic, safe_repr
+from .utf8reader import Utf8Reader
import sys
is_jython = sys.platform.startswith('java')
=======================================
--- /src/robot/utils/argumentparser.py Wed Nov 6 12:38:04 2013 UTC
+++ /src/robot/utils/argumentparser.py Mon Nov 18 21:39:35 2013 UTC
@@ -19,7 +19,6 @@
import sys
import glob
import string
-import codecs
import textwrap
from robot.errors import DataError, Information, FrameworkError
@@ -27,6 +26,7 @@
from .misc import plural_or_not
from .encoding import decode_output, decode_from_system
+from .utf8reader import Utf8Reader
ESCAPES = dict(
@@ -398,14 +398,11 @@
def _read_from_file(self, path):
try:
- with open(path) as f:
- content = f.read().decode('UTF-8')
+ with Utf8Reader(path) as reader:
+ return reader.read()
except (IOError, UnicodeError), err:
raise DataError("Opening argument file '%s' failed: %s"
% (path, err))
- if content.startswith(codecs.BOM_UTF8.decode('UTF-8')):
- content = content[1:]
- return content
def _read_from_stdin(self):
content = sys.__stdin__.read()
==============================================================================
Revision: c4cbf3d7a5a2
Branch: default
Author: Pekka Klärck
Date: Mon Nov 18 23:59:39 2013 UTC
Log: Utf8Reader: enforce files to be open in binary to prevent
problems with IronPython
Update issue 1580
Status: Done
In the end making sure that files are opened in binary mode was enough to
fix the problem. Could have removed the earlier added Utf8Reader, but
decided to keep it because it anyway handles docoding and removing the BOM.
http://code.google.com/p/robotframework/source/detail?r=c4cbf3d7a5a2
Added:
/utest/utils/test_utf8reader.py
Modified:
/atest/robot/parsing/ignore_bom.txt
/src/robot/parsing/populators.py
/src/robot/utils/utf8reader.py
=======================================
--- /dev/null
+++ /utest/utils/test_utf8reader.py Mon Nov 18 23:59:39 2013 UTC
@@ -0,0 +1,66 @@
+from __future__ import with_statement
+from codecs import BOM_UTF8
+from StringIO import StringIO
+import os
+import tempfile
+import unittest
+
+from robot.utils import Utf8Reader
+from robot.utils.asserts import assert_equals, assert_raises
+
+
+PATH = os.path.join(tempfile.gettempdir(), 'test_utf8reader.xml')
+STRING =
u'Hyv\xe4\xe4\nty\xf6t\xe4\n.C\u043f\u0430\u0441\u0438\u0431\u043e'
+
+
+class TestUtf8ReaderWithBom(unittest.TestCase):
+ BOM = BOM_UTF8
+
+ def setUp(self):
+ self._create()
+
+ def _create(self, content=STRING, encoding='UTF-8'):
+ with open(PATH, 'wb') as f:
+ f.write(self.BOM + content.encode(encoding))
+
+ def tearDown(self):
+ os.remove(PATH)
+
+ def test_read(self):
+ with Utf8Reader(PATH) as reader:
+ f = reader._file
+ assert_equals(reader.read(), STRING)
+ assert_equals(f.closed, True)
+
+ def test_read_open_file(self):
+ with open(PATH, 'rb') as f:
+ with Utf8Reader(f) as reader:
+ assert_equals(reader.read(), STRING)
+ assert_equals(f.closed, False)
+
+ def test_must_open_in_binary_mode(self):
+ with open(PATH, 'r') as f:
+ assert_raises(ValueError, Utf8Reader, f)
+
+ def test_stringio_is_ok(self):
+ f = StringIO(self.BOM + STRING.encode('UTF-8'))
+ with Utf8Reader(f) as reader:
+ assert_equals(reader.read(), STRING)
+ assert_equals(f.closed, False)
+
+ def test_readlines(self):
+ with Utf8Reader(PATH) as reader:
+ assert_equals(list(reader.readlines()),
STRING.splitlines(True))
+
+ def test_invalid_encoding(self):
+ self._create(STRING.splitlines()[-1], 'ISO-8859-5')
+ with Utf8Reader(PATH) as reader:
+ assert_raises(UnicodeDecodeError, reader.read)
+
+
+class TestUtf8ReaderWithoutBom(TestUtf8ReaderWithBom):
+ BOM = ''
+
+
+if __name__ == '__main__':
+ unittest.main()
=======================================
--- /atest/robot/parsing/ignore_bom.txt Mon Apr 12 14:52:47 2010 UTC
+++ /atest/robot/parsing/ignore_bom.txt Mon Nov 18 23:59:39 2013 UTC
@@ -4,12 +4,19 @@
Force Tags regression pybot jybot
Resource atest_resource.txt
-
*** Test Cases ***
-
Byte order mark in plain text file
- Check test case ${TESTNAME}
+ [Setup] File Should Have Bom parsing/bom.txt
+ ${tc} = Check test case ${TESTNAME}
+ Check log message ${tc.kws[0].msgs[0]} Hyvää päivää €åppa!
+Byte order mark in TSV file
+ [Setup] File Should Have Bom parsing/bom.txt
+ ${tc} = Check test case ${TESTNAME}
+ Check log message ${tc.kws[0].msgs[0]} Hyvää päivää €åppa!
-Byte order mark in TSV file
- Check test case ${TESTNAME}
+*** Keywords ***
+File Should Have Bom
+ [Arguments] ${path}
+ ${content} = Get File ${DATADIR}/${path}
+ Should Start With ${content} \ufeff No BOM!!
=======================================
--- /src/robot/parsing/populators.py Thu Jun 6 14:00:44 2013 UTC
+++ /src/robot/parsing/populators.py Mon Nov 18 23:59:39 2013 UTC
@@ -65,6 +65,8 @@
if not os.path.isfile(path):
raise DataError("Data source does not exist.")
try:
+ # IronPython handles BOM incorrectly if not using binary mode:
+ # http://code.google.com/p/robotframework/issues/detail?id=1580
return open(path, 'rb')
except:
raise DataError(get_error_message())
=======================================
--- /src/robot/utils/utf8reader.py Mon Nov 18 21:39:35 2013 UTC
+++ /src/robot/utils/utf8reader.py Mon Nov 18 23:59:39 2013 UTC
@@ -19,11 +19,15 @@
def __init__(self, path_or_file):
if isinstance(path_or_file, basestring):
- self._file = open(path_or_file)
+ self._file = open(path_or_file, 'rb')
self._close = True
else:
self._file = path_or_file
self._close = False
+ # IronPython handles BOM incorrectly if file not opened in binary
mode:
+ # http://code.google.com/p/robotframework/issues/detail?id=1580
+ if hasattr(self._file, 'mode') and self._file.mode != 'rb':
+ raise ValueError('Only files in binary mode accepted.')
def __enter__(self):
return self
@@ -35,11 +39,11 @@
def read(self):
return self._decode(self._file.read())
+ def readlines(self):
+ for index, line in enumerate(self._file.readlines()):
+ yield self._decode(line, remove_bom=index == 0)
+
def _decode(self, content, remove_bom=True):
if remove_bom and content.startswith(BOM_UTF8):
content = content[len(BOM_UTF8):]
return content.decode('UTF-8')
-
- def readlines(self):
- for index, line in enumerate(self._file.readlines()):
- yield self._decode(line, remove_bom=index == 0)
--
---
You received this message because you are subscribed to the Google Groups "robotframework-commit" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.