Andrew Veitch wrote at 2006-3-20 01:53 +0000:
> ...
> <input name="blah" type="text" tal:attributes="value python:chr
>(200).encode('utf-8')" />
>This gives:
>
>Error Type: UnicodeDecodeError
>Error Value: 'ascii' codec can't decode byte 0x80 in position 0:
>ordinal not in range(128)
Sure, you are using "str.encode" in a wrong way:
"str.encode('uft-8')" is equivalent to
"unicode(str, getdefaultencoding()).encode('utf-8')".
What encoding should your "200" use?
Convert it to unicode using this encoding (and let the
ZPublisher convert the unicode to "utf-8").
By the way, your exception must come from somewhere else
as "chr(200)" cannot lead to a "byte 0x80".
It is always worth to look at the traceback. It tells you
where the exception really comes from...
>...
><input name="blah" type="text" tal:attributes="value python:chr(200)" />
>
>Then this will work in HTML mode but will fail in XML mode.
You should use Unicode in XML mode...
> ...
>> I could provide patches, if useful.
>
>I would be very interested to see you patches.
Attached.
--- TALDefs.py 2005-08-17 10:48:18.000000000 +0200
+++ /home/dieter/Z/Base/lib/python/TAL/TALDefs.py 2005-11-12 09:29:03.000000000 +0100
@@ -111,8 +111,15 @@
import re
-_attr_re = re.compile(r"\s*([^\s]+)\s+([^\s].*)\Z", re.S)
-_subst_re = re.compile(r"\s*(?:(text|structure)\s+)?(.*)\Z", re.S)
+# DM 2005-11-12: support "mltext" (Markup Language text)
+# as additional quote type. It corresponds to the SGML "RCDATA"
+# (Replacable Character Data) which may contain entity references
+# but no other markup. Correspondingly, "mltext" quotes '<' but
+# leaves alone character entities.
+#_attr_re = re.compile(r"\s*([^\s]+)\s+(?:(text|structure)\s+)?([^\s].*)\Z", re.S)
+#_subst_re = re.compile(r"\s*(?:(text|structure)\s+)?(.*)\Z", re.S)
+_attr_re = re.compile(r"\s*([^\s]+)\s+(?:(text|structure|mltext)\s+)?([^\s].*)\Z", re.S)
+_subst_re = re.compile(r"\s*(?:(text|structure|mltext)\s+)?(.*)\Z", re.S)
del re
def parseAttributeReplacements(arg, xml):
@@ -121,12 +128,12 @@
m = _attr_re.match(part)
if not m:
raise TALError("Bad syntax in attributes: " + `part`)
- name, expr = m.group(1, 2)
+ name, quote_type, expr = m.group(1, 2, 3)
if not xml:
name = name.lower()
if dict.has_key(name):
raise TALError("Duplicate attribute name in attributes: " + `part`)
- dict[name] = expr
+ dict[name] = quote_type, expr
return dict
def parseSubstitution(arg, position=(None, None)):
--- TALGenerator.py 2005-08-17 10:48:18.000000000 +0200
+++ /home/dieter/Z/Base/lib/python/TAL/TALGenerator.py 2005-11-12 10:52:29.000000000 +0100
@@ -284,6 +284,9 @@
cexpr = self.compileExpression(expr)
if key == "text":
self.emit("insertText", cexpr, [])
+ # DM 2005-11-12: 'mltext' support (see 'TALDefs' for details)
+ elif key == "mltext":
+ self.emit("insertMLText", cexpr, [])
else:
assert key == "structure"
self.emit("insertStructure", cexpr, {}, [])
@@ -315,6 +318,9 @@
program = self.popProgram()
if key == "text":
self.emit("insertText", cexpr, program)
+ # DM 2005-11-12: 'mltext' support (see 'TALDefs' for details)
+ elif key == "mltext":
+ self.emit("insertMLText", cexpr, program)
else:
assert key == "structure"
self.emit("insertStructure", cexpr, attrDict, program)
@@ -352,8 +358,11 @@
assert action == I18N_EXPRESSION
key, expr = parseSubstitution(expression)
cexpr = self.compileExpression(expr)
+ # DM 2005-11-12: 'mltext' support (see 'TALDefs' for details)
+ #self.emit('i18nVariable',
+ # varname, program, cexpr, int(key == "structure"))
self.emit('i18nVariable',
- varname, program, cexpr, int(key == "structure"))
+ varname, program, cexpr, key or 'text')
def emitTranslation(self, msgid, i18ndata):
program = self.popProgram()
@@ -464,13 +473,13 @@
for item in attrlist:
key = item[0]
if repldict.has_key(key):
- expr, xlat, msgid = repldict[key]
- item = item[:2] + ("replace", expr, xlat, msgid)
+ expr, quote_type, xlat, msgid = repldict[key]
+ item = item[:2] + ("replace", expr, quote_type, xlat, msgid)
del repldict[key]
newlist.append(item)
# Add dynamic-only attributes
- for key, (expr, xlat, msgid) in repldict.items():
- newlist.append((key, None, "insert", expr, xlat, msgid))
+ for key, (expr, quote_type, xlat, msgid) in repldict.items():
+ newlist.append((key, None, "insert", expr, quote_type, xlat, msgid))
return newlist
def emitStartElement(self, name, attrlist, taldict, metaldict, i18ndict,
@@ -675,17 +684,17 @@
i18nattrs = {}
# Convert repldict's name-->expr mapping to a
# name-->(compiled_expr, translate) mapping
- for key, value in repldict.items():
+ for key, (quote_type, value) in repldict.items():
if i18nattrs.get(key, None):
raise I18NError(
("attribute [%s] cannot both be part of tal:attributes" +
" and have a msgid in i18n:attributes") % key,
position)
ce = self.compileExpression(value)
- repldict[key] = ce, key in i18nattrs, i18nattrs.get(key)
+ repldict[key] = ce, quote_type, key in i18nattrs, i18nattrs.get(key)
for key in i18nattrs:
if not repldict.has_key(key):
- repldict[key] = None, 1, i18nattrs.get(key)
+ repldict[key] = None, None, 1, i18nattrs.get(key)
else:
repldict = {}
if replace:
@@ -783,7 +792,7 @@
# - I18N_EXPRESSION for explicit tal:replace
# o varname[2] will be None for the first two actions and the
# replacement tal expression for the third action. This
- # can include a 'text' or 'structure' indicator.
+ # can include a 'text' or 'structure' or 'mltext' (DM) indicator.
assert (varname[1]
in [I18N_REPLACE, I18N_CONTENT, I18N_EXPRESSION])
self.emitI18nVariable(varname)
--- TALInterpreter.py 2005-08-17 10:48:18.000000000 +0200
+++ /home/dieter/Z/Base/lib/python/TAL/TALInterpreter.py 2005-12-29 16:07:20.000000000 +0100
@@ -13,7 +13,7 @@
##############################################################################
"""Interpreter for a pre-compiled TAL program.
-$Id: TALInterpreter.py 37696 2005-08-04 14:22:37Z yuppie $
+$Id$
"""
import cgi
import sys
@@ -360,7 +360,7 @@
def attrAction_tal(self, item):
name, value, action = item[:3]
ok = 1
- expr, xlat, msgid = item[3:]
+ expr, quote_type, xlat, msgid = item[3:]
if self.html and name.lower() in BOOLEAN_HTML_ATTRS:
evalue = self.engine.evaluateBoolean(item[3])
if evalue is self.Default:
@@ -384,7 +384,14 @@
if ok:
if xlat:
- translated = self.translate(msgid or value, value, {})
+ # for text/xml we need to use the utranslate() method
+ # since attribute names will be unicode string causing
+ # trouble in line 410
+ if self.html:
+ translated = self.translate(msgid or value, value, {})
+ else:
+ translated = self.utranslate(msgid or value, value, {})
+
if translated is not None:
value = translated
if value is None:
@@ -392,7 +399,14 @@
elif evalue is self.Default:
value = attrEscape(value)
else:
- value = escape(value, quote=1)
+ if quote_type == 'structure':
+ value = value.replace('"','"')
+ # DM 2005-11-12: 'mltext' support (see 'TALDefs' for details)
+ elif quote_type == 'mltext':
+ value = value.replace('"','"').replace('<','<')
+ else:
+ value = escape(value, quote=1)
+
value = '%s="%s"' % (name, value)
return ok, name, value
bytecode_handlers["<attrAction>"] = attrAction
@@ -497,7 +511,9 @@
def do_insertText(self, stuff):
self.interpret(stuff[1])
- def do_insertText_tal(self, stuff):
+ # DM 2005-11-12: 'mltext' support (see 'TALDefs' for details)
+ #def do_insertText_tal(self, stuff):
+ def do_insertText_tal(self, stuff, escape=escape):
text = self.engine.evaluateText(stuff[0])
if text is None:
return
@@ -516,8 +532,16 @@
self.col = len(s) - (i + 1)
bytecode_handlers["insertText"] = do_insertText
+ # DM 2005-11-12: 'mltext' support (see 'TALDefs' for details)
+ def do_insertMLText_tal(self, stuff, escape=lambda s: s.replace('<','<')):
+ return self.do_insertText_tal(stuff, escape)
+ bytecode_handlers["insertMLText"] = do_insertText
+
def do_i18nVariable(self, stuff):
- varname, program, expression, structure = stuff
+ # DM 2005-11-12: 'mltext' support (see 'TALDefs' for details)
+ # Note: 'stuff' no longer contains 'structure' but the quote type
+ #varname, program, expression, structure = stuff
+ varname, program, expression, quote_type = stuff
if expression is None:
# The value is implicitly the contents of this tag, so we have to
# evaluate the mini-program to get the value of the variable.
@@ -534,7 +558,10 @@
else:
# Evaluate the value to be associated with the variable in the
# i18n interpolation dictionary.
- if structure:
+ # DM 2005-11-12: 'mltext' support (see 'TALDefs' for details)
+ # Note: 'structure' replaced by 'quote_type'
+ #if structure:
+ if quote_type == 'structure':
value = self.engine.evaluateStructure(expression)
else:
value = self.engine.evaluate(expression)
@@ -545,7 +572,12 @@
value = self.engine.translate(value.domain, value,
value.mapping)
- if not structure:
+ # DM 2005-11-12: 'mltext' support (see 'TALDefs' for details)
+ # Note: 'structure' replaced by 'quote_type'
+ #if not structure:
+ if quote_type == 'mltext':
+ value = value.replace('<', '<')
+ elif quote_type != 'structure':
value = cgi.escape(ustr(value))
# Either the i18n:name tag is nested inside an i18n:translate in which
@@ -660,6 +692,15 @@
return self.engine.translate(self.i18nContext.domain,
msgid, i18ndict, default=default)
+ def utranslate(self, msgid, default, i18ndict, obj=None):
+ if obj:
+ i18ndict.update(obj)
+ if not self.i18nInterpolate:
+ return msgid
+ # XXX We need to pass in one of context or target_language
+ return self.engine.utranslate(self.i18nContext.domain,
+ msgid, i18ndict, default=default)
+
def do_rawtextColumn(self, (s, col)):
self._stream_write(s)
self.col = col
@@ -733,7 +774,14 @@
if self.sourceFile != prev_source:
self.engine.setSourceFile(prev_source)
self.sourceFile = prev_source
- self.pushMacro(macroName, slots, entering=0)
+ # DM: leads to really strange behaviour when macro
+ # definitions are nested
+ # Furthermore, it is unclear why a defined slot
+ # should change the nature.
+ # self.pushMacro(macroName, slots, entering=0)
+ self.pushMacro(macroName, slots,
+ #entering=0,
+ )
return
self.pushMacro(macroName, slots)
# Falling out of the 'if' allows the macro to be interpreted.
@@ -774,6 +822,8 @@
bytecode_handlers_tal["setGlobal"] = do_setGlobal_tal
bytecode_handlers_tal["insertStructure"] = do_insertStructure_tal
bytecode_handlers_tal["insertText"] = do_insertText_tal
+ # DM 2005-11-12: 'mltext' support (see 'TALDefs' for details)
+ bytecode_handlers_tal["insertMLText"] = do_insertMLText_tal
bytecode_handlers_tal["loop"] = do_loop_tal
bytecode_handlers_tal["onError"] = do_onError_tal
bytecode_handlers_tal["<attrAction>"] = attrAction_tal
--
Dieter
_______________________________________________
Zope maillist - [email protected]
http://mail.zope.org/mailman/listinfo/zope
** No cross posts or HTML encoding! **
(Related lists -
http://mail.zope.org/mailman/listinfo/zope-announce
http://mail.zope.org/mailman/listinfo/zope-dev )