Author: brane
Date: Fri Dec  4 16:57:45 2009
New Revision: 887265

URL: http://svn.apache.org/viewvc?rev=887265&view=rev
Log:
Follow up to r887178; replace ASCII control characters with glyphs
from the Control Pictures Unicode range and refactor the escaping funcs.

* tools/dev/gen_junit_report.py
  (xml_encode_map): New escape mapping, extracted from xml_encode.
  (special_encode_map): New escape mapping.
  (replace_from_map): New function. Munge string with an escape mapping.
  (xml_encode): Reimplement with replace_from_map and xml_encode_map.
  (escape_special_characters): Reimplement with replace_from_map
   and special_encode_map.

Modified:
    subversion/trunk/tools/dev/gen_junit_report.py

Modified: subversion/trunk/tools/dev/gen_junit_report.py
URL: 
http://svn.apache.org/viewvc/subversion/trunk/tools/dev/gen_junit_report.py?rev=887265&r1=887264&r2=887265&view=diff
==============================================================================
--- subversion/trunk/tools/dev/gen_junit_report.py (original)
+++ subversion/trunk/tools/dev/gen_junit_report.py Fri Dec  4 16:57:45 2009
@@ -33,31 +33,64 @@
 import os
 import getopt
 
-ASCII_TABLE = "".join([chr(n) for n in xrange(256)])
-# remove all special characters upto ascii value 31, except line feed (10)
-# and carriage return (13)
-CHARS_TO_REMOVE = ASCII_TABLE[0:10] + ASCII_TABLE[11:13] + ASCII_TABLE[14:32]
+def replace_from_map(data, encode):
+    """replace substrings in DATA with replacements defined in ENCODING"""
+    for pattern, replacement in encode.items():
+        data = data.replace(pattern, replacement)
+    return data
 
-def xml_encode(data):
-    """encode the xml characters in the data"""
-    encode = {
+xml_encode_map = {
       '&': '&',
       '<': '&lt;',
       '>': '&gt;',
       '"': '&quot;',
-      "'": '&apos;'
+      "'": '&apos;',
+      }
+
+def xml_encode(data):
+    """encode the xml characters in the data"""
+    return replace_from_map(data, xml_encode_map)
+
+special_encode_map = {
+    ']]>': ']]]]><![CDATA[>', # CDATA terminator sequence
+    '\000': '&#9216;',        # U+2400 SYMBOL FOR NULL
+    '\001': '&#9217;',        # U+2401 SYMBOL FOR START OF HEADING
+    '\002': '&#9218;',        # U+2402 SYMBOL FOR START OF TEXT
+    '\003': '&#9219;',        # U+2403 SYMBOL FOR END OF TEXT
+    '\004': '&#9220;',        # U+2404 SYMBOL FOR END OF TRANSMISSION
+    '\005': '&#9221;',        # U+2405 SYMBOL FOR ENQUIRY
+    '\006': '&#9222;',        # U+2406 SYMBOL FOR ACKNOWLEDGE
+    '\007': '&#9223;',        # U+2407 SYMBOL FOR BELL
+    '\010': '&#9224;',        # U+2408 SYMBOL FOR BACKSPACE
+    '\011': '&#9225;',        # U+2409 SYMBOL FOR HORIZONTAL TABULATION
+   #'\012': '&#9226;',        # U+240A SYMBOL FOR LINE FEED
+    '\013': '&#9227;',        # U+240B SYMBOL FOR VERTICAL TABULATION
+    '\014': '&#9228;',        # U+240C SYMBOL FOR FORM FEED
+   #'\015': '&#9229;',        # U+240D SYMBOL FOR CARRIAGE RETURN
+    '\016': '&#9230;',        # U+240E SYMBOL FOR SHIFT OUT
+    '\017': '&#9231;',        # U+240F SYMBOL FOR SHIFT IN
+    '\020': '&#9232;',        # U+2410 SYMBOL FOR DATA LINK ESCAPE
+    '\021': '&#9233;',        # U+2411 SYMBOL FOR DEVICE CONTROL ONE
+    '\022': '&#9234;',        # U+2412 SYMBOL FOR DEVICE CONTROL TWO
+    '\023': '&#9235;',        # U+2413 SYMBOL FOR DEVICE CONTROL THREE
+    '\024': '&#9236;',        # U+2414 SYMBOL FOR DEVICE CONTROL FOUR
+    '\025': '&#9237;',        # U+2415 SYMBOL FOR NEGATIVE ACKNOWLEDGE
+    '\026': '&#9238;',        # U+2416 SYMBOL FOR SYNCHRONOUS IDLE
+    '\027': '&#9239;',        # U+2417 SYMBOL FOR END OF TRAMSNISSION BLOCK
+    '\030': '&#9240;',        # U+2418 SYMBOL FOR CANCEL
+    '\031': '&#9241;',        # U+2419 SYMBOL FOR END OF MEDIUM
+    '\032': '&#9242;',        # U+241A SYMBOL FOR SUBSTITUTE
+    '\033': '&#9243;',        # U+241B SYMBOL FOR ESCAPE
+    '\034': '&#9244;',        # U+241C SYMBOL FOR FILE SEPARATOR
+    '\035': '&#9245;',        # U+241D SYMBOL FOR GROUP SEPARATOR
+    '\036': '&#9246;',        # U+241E SYMBOL FOR RECORD SEPARATOR
+    '\037': '&#9247;',        # U+241F SYMBOL FOR UNIT SEPARATOR
     }
-    for char in encode.keys():
-        data = data.replace(char, encode[char])
-    return data
 
 def escape_special_characters(data):
     """remove special characters in test failure reasons"""
-    if not data:
-        return data
-    for char in CHARS_TO_REMOVE:
-        data = data.replace(char, '%%%0x' % ord(char))
-    data = data.replace(']]>', ']]]]><![CDATA[>')
+    if data:
+        data = replace_from_map(data, special_encode_map)
     return data
 
 def start_junit():


Reply via email to