4 new revisions:
Revision: 273f498899d2
Branch: default
Author: Pekka Klärck
Date: Sun Oct 21 13:30:38 2012
Log: proto/profiler.py: added help text
http://code.google.com/p/robotframework/source/detail?r=273f498899d2
Revision: b9528ed87b97
Branch: default
Author: Pekka Klärck
Date: Tue Oct 23 13:01:36 2012
Log: parsing: Consider non-breaking space as normal space when parsing
TSV ...
http://code.google.com/p/robotframework/source/detail?r=b9528ed87b97
Revision: e267f4f982f7
Branch: default
Author: Pekka Klärck
Date: Tue Oct 23 13:19:14 2012
Log: parsing: Consider NBSP as normal space also when parsing HTML....
http://code.google.com/p/robotframework/source/detail?r=e267f4f982f7
Revision: 01eac700dbe5
Branch: default
Author: Pekka Klärck
Date: Tue Oct 23 14:20:22 2012
Log: htmlreader: Refactored the code and fixed handlig ˜...
http://code.google.com/p/robotframework/source/detail?r=01eac700dbe5
==============================================================================
Revision: 273f498899d2
Branch: default
Author: Pekka Klärck
Date: Sun Oct 21 13:30:38 2012
Log: proto/profiler.py: added help text
http://code.google.com/p/robotframework/source/detail?r=273f498899d2
Modified:
/proto/profiler.py
=======================================
--- /proto/profiler.py Sat Oct 20 12:05:57 2012
+++ /proto/profiler.py Sun Oct 21 13:30:38 2012
@@ -1,3 +1,10 @@
+#!/usr/bin/env python
+
+"""Profiler for Robot Framework `run` and `rebot`.
+
+Usage: profiler.py run|rebot [options] arguments
+"""
+
import cProfile
import pstats
import os
@@ -11,14 +18,20 @@
from robot.run import run_cli
from robot.rebot import rebot_cli
-if sys.argv[1] != 'rebot':
- profiled = 'run_cli(sys.argv[1:])'
-else:
- profiled = 'rebot_cli(sys.argv[2:])'
-results = tempfile.mktemp(suffix='.out', prefix='pybot-profile',
+def profile(profiled):
+ results = tempfile.mktemp(suffix='.out', prefix='pybot-profile',
dir=join(rootdir, 'tmp'))
-cProfile.run(profiled, results)
-stats = pstats.Stats(results)
-stats.sort_stats('cumulative').print_stats(50)
-os.remove(results)
+ cProfile.run(profiled, results)
+ stats = pstats.Stats(results)
+ stats.sort_stats('cumulative').print_stats(50)
+ os.remove(results)
+
+
+if __name__ == '__main__':
+ try:
+ profiled = {'run': 'run_cli(sys.argv[2:])',
+ 'rebot': 'rebot_cli(sys.argv[2:])'}[sys.argv[1]]
+ except (IndexError, KeyError):
+ sys.exit(__doc__)
+ profile(profiled)
==============================================================================
Revision: b9528ed87b97
Branch: default
Author: Pekka Klärck
Date: Tue Oct 23 13:01:36 2012
Log: parsing: Consider non-breaking space as normal space when parsing
TSV and TXT.
Update issue 1264
Status: Started
Owner: pekka.klarck
Labels: Targe-2.7.5, bwic, ackn
Applied the patch Eemeli and I created as-is otherwise but fixed stripping
possible BOM.
Still need to handle HTML format and update User Guide accordingly.
http://code.google.com/p/robotframework/source/detail?r=b9528ed87b97
Added:
/atest/robot/parsing/non_breaking_space.txt
/atest/testdata/parsing/nbsp.tsv
/atest/testdata/parsing/nbsp.txt
Modified:
/src/robot/parsing/tsvreader.py
/src/robot/parsing/txtreader.py
=======================================
--- /dev/null
+++ /atest/robot/parsing/non_breaking_space.txt Tue Oct 23 13:01:36 2012
@@ -0,0 +1,17 @@
+*** Settings ***
+Documentation Regard non-breaking spaces as normal spaces in parsing
+Suite Setup Run Tests ${EMPTY} parsing/nbsp.txt parsing/nbsp.tsv
+Force Tags regression pybot jybot
+Resource atest_resource.txt
+
+
+*** Test Cases ***
+
+Non-breaking spaces in plain text file
+ Check test case ${TESTNAME}
+
+Non-breaking spaces in plain text file with pipes
+ Check test case ${TESTNAME}
+
+Non-breaking spaces in TSV file
+ Check test case ${TESTNAME}
=======================================
--- /dev/null
+++ /atest/testdata/parsing/nbsp.tsv Tue Oct 23 13:01:36 2012
@@ -0,0 +1,3 @@
+*** Test cases ***
+Non-breaking spaces in TSV file
+ Should Be Equal NBSPs only in first
NBSPs only in first
=======================================
--- /dev/null
+++ /atest/testdata/parsing/nbsp.txt Tue Oct 23 13:01:36 2012
@@ -0,0 +1,6 @@
+*** Test cases ***
+Non-breaking spaces in plain text file
+ Should Be Equal NBSPs only in first NBSPs only in first
+
+| Non-breaking spaces in plain text file with pipes |
+| | Should Be Equal | NBSPs only in first | NBSPs only in first
=======================================
--- /src/robot/parsing/tsvreader.py Tue Mar 6 00:46:30 2012
+++ /src/robot/parsing/tsvreader.py Tue Oct 23 13:01:36 2012
@@ -15,13 +15,15 @@
from codecs import BOM_UTF8
+NBSP = u'\xA0'
+
+
class TsvReader:
def read(self, tsvfile, populator):
process = False
for index, row in enumerate(tsvfile.readlines()):
- if index == 0 and row.startswith(BOM_UTF8):
- row = row[len(BOM_UTF8):]
+ row = self._decode_row(row, index == 0)
cells = [self._process(cell) for cell in self.split_row(row)]
name = cells and cells[0].strip() or ''
if name.startswith('*') and \
@@ -31,6 +33,14 @@
populator.add(cells)
populator.eof()
+ def _decode_row(self, row, is_first):
+ if is_first and row.startswith(BOM_UTF8):
+ row = row[len(BOM_UTF8):]
+ row = row.decode('UTF-8')
+ if NBSP in row:
+ row = row.replace(NBSP, ' ')
+ return row
+
@classmethod
def split_row(cls, row):
return row.rstrip().split('\t')
@@ -38,4 +48,4 @@
def _process(self, cell):
if len(cell) > 1 and cell[0] == cell[-1] == '"':
cell = cell[1:-1].replace('""','"')
- return cell.decode('UTF-8')
+ return cell
=======================================
--- /src/robot/parsing/txtreader.py Tue Mar 6 00:46:30 2012
+++ /src/robot/parsing/txtreader.py Tue Oct 23 13:01:36 2012
@@ -30,4 +30,4 @@
return [cell.strip() for cell in cls._pipe_splitter.split(row)]
def _process(self, cell):
- return cell.decode('UTF-8')
+ return cell
==============================================================================
Revision: e267f4f982f7
Branch: default
Author: Pekka Klärck
Date: Tue Oct 23 13:19:14 2012
Log: parsing: Consider NBSP as normal space also when parsing HTML.
Updata issue 1264
Now HTML is handled the same way as TXT and TSV.
Noticed that HtmlReader could be cleaned up a little (and possibly
performance enhanced at the same time) but that's not related to this issue.
http://code.google.com/p/robotframework/source/detail?r=e267f4f982f7
Added:
/atest/testdata/parsing/nbsp.html
Modified:
/atest/robot/parsing/non_breaking_space.txt
/src/robot/parsing/htmlreader.py
=======================================
--- /dev/null
+++ /atest/testdata/parsing/nbsp.html Tue Oct 23 13:19:14 2012
@@ -0,0 +1,22 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<title>NBSP</title>
+</head>
+<body>
+<h1>NBSP</h1>
+<table border="1">
+<tr>
+<th colspan="5"> Test cases </th>
+</tr>
+<tr>
+<td> Non-breaking spaces in HTML file </td>
+<td> Should Be Equal </td>
+<td> NBSPs only in first </td>
+<td> NBSPs only in first </td>
+<td></td>
+</tr>
+</table>
+</body>
+</html>
=======================================
--- /atest/robot/parsing/non_breaking_space.txt Tue Oct 23 13:01:36 2012
+++ /atest/robot/parsing/non_breaking_space.txt Tue Oct 23 13:19:14 2012
@@ -1,6 +1,6 @@
*** Settings ***
Documentation Regard non-breaking spaces as normal spaces in parsing
-Suite Setup Run Tests ${EMPTY} parsing/nbsp.txt parsing/nbsp.tsv
+Suite Setup Run Tests ${EMPTY} parsing/nbsp.*
Force Tags regression pybot jybot
Resource atest_resource.txt
@@ -15,3 +15,6 @@
Non-breaking spaces in TSV file
Check test case ${TESTNAME}
+
+Non-breaking spaces in HTML file
+ Check test case ${TESTNAME}
=======================================
--- /src/robot/parsing/htmlreader.py Tue Mar 6 00:46:30 2012
+++ /src/robot/parsing/htmlreader.py Tue Oct 23 13:19:14 2012
@@ -18,6 +18,7 @@
from htmlentitydefs import entitydefs
extra_entitydefs = {'nbsp': ' ', 'apos': "'", 'tilde': '~'}
+NON_BREAKING_SPACE = u'\xA0'
class HtmlReader(HTMLParser.HTMLParser):
@@ -67,6 +68,8 @@
return
if decode:
data = data.decode(self._encoding)
+ if NON_BREAKING_SPACE in data:
+ data = data.replace(NON_BREAKING_SPACE, ' ')
self.current_cell.append(data)
def handle_entityref(self, name):
==============================================================================
Revision: 01eac700dbe5
Branch: default
Author: Pekka Klärck
Date: Tue Oct 23 14:20:22 2012
Log: htmlreader: Refactored the code and fixed handlig ˜
Update issue 1265
Status: Done
Fixed.
http://code.google.com/p/robotframework/source/detail?r=01eac700dbe5
Modified:
/atest/robot/parsing/html_entityrefs.txt
/atest/testdata/parsing/html_entityrefs.html
/atest/testdata/parsing/html_entityrefs_variables.py
/src/robot/parsing/htmlreader.py
=======================================
--- /atest/robot/parsing/html_entityrefs.txt Wed Aug 31 12:24:58 2011
+++ /atest/robot/parsing/html_entityrefs.txt Tue Oct 23 14:20:22 2012
@@ -6,18 +6,18 @@
*** Test Cases ***
Scandinavian Letters
- Check Test Case Scandinavian Letters
+ Check Test Case ${TEST NAME}
XML Escapes
- Check Test Case XML Escapes
+ Check Test Case ${TEST NAME}
Other Escapes
- Check Test Case Other Escapes
+ Check Test Case ${TEST NAME}
Numerical Escapes
[Documentation] These are character references
- Check Test Case Numerical Escapes
+ Check Test Case ${TEST NAME}
Variables using escapes
- Check Test Case Variables using escapes
+ Check Test Case ${TEST NAME}
=======================================
--- /atest/testdata/parsing/html_entityrefs.html Tue Oct 18 02:22:17 2011
+++ /atest/testdata/parsing/html_entityrefs.html Tue Oct 23 14:20:22 2012
@@ -150,8 +150,7 @@
<tr>
<td>XML Escapes</td>
<td>Should Be Equal</td>
-<td>& &amp; < <tag>
-> ' " '" ' &gt;<br></td>
+<td>& &amp; <tag> ' " &gt;<br></td>
<td>${XML ESCAPES}</td>
<td></td>
</tr>
@@ -165,7 +164,7 @@
<tr>
<td>Other Escapes</td>
<td>Should Be Equal</td>
-<td>§xxx§ ˜ ' "</td>
+<td>§xxx ˜ ~</td>
<td>${OTHER ESCAPES}</td>
<td></td>
</tr>
@@ -219,32 +218,6 @@
<td></td>
<td></td>
</tr>
-</tbody>
-</table>
-<table border="1">
-<colgroup span="99"><col class="name"><col class="action"><col class="arg"
span="3"></colgroup>
-<thead> <tr>
-<th>Keyword</th>
-<th>Action</th>
-<th>Argument</th>
-<th>Argument</th>
-<th>Argument</th>
-</tr>
-</thead> <tbody>
-<tr>
-<td></td>
-<td></td>
-<td></td>
-<td></td>
-<td></td>
-</tr>
-<tr>
-<td></td>
-<td></td>
-<td></td>
-<td></td>
-<td></td>
-</tr>
</tbody>
</table>
</body></html>
=======================================
--- /atest/testdata/parsing/html_entityrefs_variables.py Wed Aug 31
12:31:53 2011
+++ /atest/testdata/parsing/html_entityrefs_variables.py Tue Oct 23
14:20:22 2012
@@ -1,3 +1,3 @@
-scandinavian_letters = u'Hyv\u00E4\u00E4 \u00FC\u00F6t\u00E4
\u00C5\u00C4\u00D6'
-xml_escapes = '''& & < <tag> > ' " '" ' >'''
-other_escapes = u'\u00A7xxx\u00A7 \u007E \' "'
+scandinavian_letters = u'Hyv\xE4\xE4 \xFC\xF6t\xE4 \xC5\xC4\xD6'
+xml_escapes = '''& & <tag> ' " >'''
+other_escapes = u'''\xA7xxx \u02DC ~'''
=======================================
--- /src/robot/parsing/htmlreader.py Tue Oct 23 13:19:14 2012
+++ /src/robot/parsing/htmlreader.py Tue Oct 23 14:20:22 2012
@@ -14,10 +14,9 @@
import HTMLParser
-import sys
from htmlentitydefs import entitydefs
-extra_entitydefs = {'nbsp': ' ', 'apos': "'", 'tilde': '~'}
+
NON_BREAKING_SPACE = u'\xA0'
@@ -29,16 +28,16 @@
def __init__(self):
HTMLParser.HTMLParser.__init__(self)
self._encoding = 'ISO-8859-1'
- self._handlers = { 'table_start' : self.table_start,
- 'table_end' : self.table_end,
- 'tr_start' : self.tr_start,
- 'tr_end' : self.tr_end,
- 'td_start' : self.td_start,
- 'td_end' : self.td_end,
- 'th_start' : self.td_start,
- 'th_end' : self.td_end,
- 'br_start' : self.br_start,
- 'meta_start' : self.meta_start }
+ self._handlers = {'table_start' : self.table_start,
+ 'table_end' : self.table_end,
+ 'tr_start' : self.tr_start,
+ 'tr_end' : self.tr_end,
+ 'td_start' : self.td_start,
+ 'td_end' : self.td_end,
+ 'th_start' : self.td_start,
+ 'th_end' : self.td_end,
+ 'br_start' : self.br_start,
+ 'meta_start' : self.meta_start}
def read(self, htmlfile, populator):
self.populator = populator
@@ -46,13 +45,16 @@
self.current_row = None
self.current_cell = None
for line in htmlfile.readlines():
- self.feed(line)
+ self.feed(self._decode(line))
# Calling close is required by the HTMLParser but may cause
problems
# if the same instance of our HtmlParser is reused. Currently it's
# used only once so there's no problem.
self.close()
self.populator.eof()
+ def _decode(self, line):
+ return line.decode(self._encoding)
+
def handle_starttag(self, tag, attrs):
handler = self._handlers.get(tag+'_start')
if handler is not None:
@@ -63,22 +65,20 @@
if handler is not None:
handler()
- def handle_data(self, data, decode=True):
+ def handle_data(self, data):
if self.state == self.IGNORE or self.current_cell is None:
return
- if decode:
- data = data.decode(self._encoding)
if NON_BREAKING_SPACE in data:
data = data.replace(NON_BREAKING_SPACE, ' ')
self.current_cell.append(data)
def handle_entityref(self, name):
value = self._handle_entityref(name)
- self.handle_data(value, decode=False)
+ self.handle_data(value)
def _handle_entityref(self, name):
- if extra_entitydefs.has_key(name):
- return extra_entitydefs[name]
+ if name == 'apos': # missing from entitydefs
+ return "'"
try:
value = entitydefs[name]
except KeyError:
@@ -89,12 +89,12 @@
def handle_charref(self, number):
value = self._handle_charref(number)
- self.handle_data(value, decode=False)
+ self.handle_data(value)
def _handle_charref(self, number):
- if number.lower().startswith('x'):
+ if number.startswith(('x', 'X')):
+ base = 16
number = number[1:]
- base = 16
else:
base = 10
try:
@@ -133,13 +133,8 @@
if self.current_cell is not None:
self.td_end()
if self.state == self.INITIAL:
- if len(self.current_row) > 0:
- if self.populator.start_table(self.current_row):
- self.state = self.PROCESS
- else:
- self.state = self.IGNORE
- else:
- self.state = self.IGNORE
+ accepted = self.populator.start_table(self.current_row)
+ self.state = self.PROCESS if accepted else self.IGNORE
elif self.state == self.PROCESS:
self.populator.add(self.current_row)
self.current_row = None
@@ -158,8 +153,7 @@
self.current_cell = None
def br_start(self, attrs=None):
- if self.current_cell is not None and self.state != self.IGNORE:
- self.current_cell.append('\n')
+ self.handle_data('\n')
def meta_start(self, attrs):
encoding = self._get_encoding_from_meta(attrs)
@@ -178,7 +172,7 @@
token = token.strip()
if token.lower().startswith('charset='):
encoding = token[8:]
- return valid_http_equiv and encoding or None
+ return encoding if valid_http_equiv else None
def _get_encoding_from_pi(self, data):
data = data.strip()
@@ -193,18 +187,3 @@
encoding = encoding[1:-1]
return encoding
return None
-
-
-# Workaround for following bug in Python 2.6:
http://bugs.python.org/issue3932
-if sys.version_info[:2] > (2, 5):
- def unescape_from_py25(self, s):
- if '&' not in s:
- return s
- s = s.replace("<", "<")
- s = s.replace(">", ">")
- s = s.replace("'", "'")
- s = s.replace(""", '"')
- s = s.replace("&", "&") # Must be last
- return s
-
- HTMLParser.HTMLParser.unescape = unescape_from_py25