Re: [Zope] Using tal:attributes in XML with non-ASCII characters

Dieter Maurer Mon, 20 Mar 2006 10:50:30 -0800

Andrew Veitch wrote at 2006-3-20 01:53 +0000:
> ...
>     <input name="blah" type="text" tal:attributes="value python:chr 
>(200).encode('utf-8')" />
>This gives:
>
>Error Type: UnicodeDecodeError
>Error Value: 'ascii' codec can't decode byte 0x80 in position 0:  
>ordinal not in range(128)


Sure, you are using "str.encode" in a wrong way:

  "str.encode('uft-8')" is equivalent to
  "unicode(str, getdefaultencoding()).encode('utf-8')".


What encoding should your "200" use?

   Convert it to unicode using this encoding (and let the
   ZPublisher convert the unicode to "utf-8").


By the way, your exception must come from somewhere else
as "chr(200)" cannot lead to a "byte 0x80".

It is always worth to look at the traceback. It tells you
where the exception really comes from...

>...
><input name="blah" type="text" tal:attributes="value python:chr(200)" />
>
>Then this will work in HTML mode but will fail in XML mode.

You should use Unicode in XML mode...

> ...
>> I could provide patches, if useful.
>
>I would be very interested to see you patches.

Attached.

--- TALDefs.py	2005-08-17 10:48:18.000000000 +0200
+++ /home/dieter/Z/Base/lib/python/TAL/TALDefs.py	2005-11-12 09:29:03.000000000 +0100
@@ -111,8 +111,15 @@
 
 
 import re
-_attr_re = re.compile(r"\s*([^\s]+)\s+([^\s].*)\Z", re.S)
-_subst_re = re.compile(r"\s*(?:(text|structure)\s+)?(.*)\Z", re.S)
+# DM 2005-11-12: support "mltext" (Markup Language text)
+#   as additional quote type. It corresponds to the SGML "RCDATA"
+#   (Replacable Character Data) which may contain entity references
+#   but no other markup. Correspondingly, "mltext" quotes '<' but
+#   leaves alone character entities.
+#_attr_re = re.compile(r"\s*([^\s]+)\s+(?:(text|structure)\s+)?([^\s].*)\Z", re.S)
+#_subst_re = re.compile(r"\s*(?:(text|structure)\s+)?(.*)\Z", re.S)
+_attr_re = re.compile(r"\s*([^\s]+)\s+(?:(text|structure|mltext)\s+)?([^\s].*)\Z", re.S)
+_subst_re = re.compile(r"\s*(?:(text|structure|mltext)\s+)?(.*)\Z", re.S)
 del re
 
 def parseAttributeReplacements(arg, xml):
@@ -121,12 +128,12 @@
         m = _attr_re.match(part)
         if not m:
             raise TALError("Bad syntax in attributes: " + `part`)
-        name, expr = m.group(1, 2)
+        name, quote_type, expr = m.group(1, 2, 3)
         if not xml:
             name = name.lower()
         if dict.has_key(name):
             raise TALError("Duplicate attribute name in attributes: " + `part`)
-        dict[name] = expr
+        dict[name] = quote_type, expr
     return dict
 
 def parseSubstitution(arg, position=(None, None)):
--- TALGenerator.py	2005-08-17 10:48:18.000000000 +0200
+++ /home/dieter/Z/Base/lib/python/TAL/TALGenerator.py	2005-11-12 10:52:29.000000000 +0100
@@ -284,6 +284,9 @@
         cexpr = self.compileExpression(expr)
         if key == "text":
             self.emit("insertText", cexpr, [])
+        # DM 2005-11-12: 'mltext' support (see 'TALDefs' for details)
+        elif key == "mltext":
+            self.emit("insertMLText", cexpr, [])
         else:
             assert key == "structure"
             self.emit("insertStructure", cexpr, {}, [])
@@ -315,6 +318,9 @@
         program = self.popProgram()
         if key == "text":
             self.emit("insertText", cexpr, program)
+        # DM 2005-11-12: 'mltext' support (see 'TALDefs' for details)
+        elif key == "mltext":
+            self.emit("insertMLText", cexpr, program)
         else:
             assert key == "structure"
             self.emit("insertStructure", cexpr, attrDict, program)
@@ -352,8 +358,11 @@
             assert action == I18N_EXPRESSION
             key, expr = parseSubstitution(expression)
             cexpr = self.compileExpression(expr)
+        # DM 2005-11-12: 'mltext' support (see 'TALDefs' for details)
+        #self.emit('i18nVariable',
+        #          varname, program, cexpr, int(key == "structure"))
         self.emit('i18nVariable',
-                  varname, program, cexpr, int(key == "structure"))
+                  varname, program, cexpr, key or 'text')
 
     def emitTranslation(self, msgid, i18ndata):
         program = self.popProgram()
@@ -464,13 +473,13 @@
         for item in attrlist:
             key = item[0]
             if repldict.has_key(key):
-                expr, xlat, msgid = repldict[key]
-                item = item[:2] + ("replace", expr, xlat, msgid)
+                expr, quote_type, xlat, msgid = repldict[key]
+                item = item[:2] + ("replace", expr, quote_type, xlat, msgid)
                 del repldict[key]
             newlist.append(item)
         # Add dynamic-only attributes
-        for key, (expr, xlat, msgid) in repldict.items():
-            newlist.append((key, None, "insert", expr, xlat, msgid))
+        for key, (expr, quote_type, xlat, msgid) in repldict.items():
+            newlist.append((key, None, "insert", expr, quote_type, xlat, msgid))
         return newlist
 
     def emitStartElement(self, name, attrlist, taldict, metaldict, i18ndict,
@@ -675,17 +684,17 @@
                 i18nattrs = {}
             # Convert repldict's name-->expr mapping to a
             # name-->(compiled_expr, translate) mapping
-            for key, value in repldict.items():
+            for key, (quote_type, value) in repldict.items():
                 if i18nattrs.get(key, None):
                     raise I18NError(
                       ("attribute [%s] cannot both be part of tal:attributes" +
                       " and have a msgid in i18n:attributes") % key,
                     position)
                 ce = self.compileExpression(value)
-                repldict[key] = ce, key in i18nattrs, i18nattrs.get(key)
+                repldict[key] = ce, quote_type, key in i18nattrs, i18nattrs.get(key)
             for key in i18nattrs:
                 if not repldict.has_key(key):
-                    repldict[key] = None, 1, i18nattrs.get(key)
+                    repldict[key] = None, None, 1, i18nattrs.get(key)
         else:
             repldict = {}
         if replace:
@@ -783,7 +792,7 @@
             #   - I18N_EXPRESSION for explicit tal:replace
             # o varname[2] will be None for the first two actions and the
             #   replacement tal expression for the third action.  This
-            #   can include a 'text' or 'structure' indicator.
+            #   can include a 'text' or 'structure' or 'mltext' (DM) indicator.
             assert (varname[1]
                     in [I18N_REPLACE, I18N_CONTENT, I18N_EXPRESSION])
             self.emitI18nVariable(varname)
--- TALInterpreter.py	2005-08-17 10:48:18.000000000 +0200
+++ /home/dieter/Z/Base/lib/python/TAL/TALInterpreter.py	2005-12-29 16:07:20.000000000 +0100
@@ -13,7 +13,7 @@
 ##############################################################################
 """Interpreter for a pre-compiled TAL program.
 
-$Id: TALInterpreter.py 37696 2005-08-04 14:22:37Z yuppie $
+$Id$
 """
 import cgi
 import sys
@@ -360,7 +360,7 @@
     def attrAction_tal(self, item):
         name, value, action = item[:3]
         ok = 1
-        expr, xlat, msgid = item[3:]
+        expr, quote_type, xlat, msgid = item[3:]
         if self.html and name.lower() in BOOLEAN_HTML_ATTRS:
             evalue = self.engine.evaluateBoolean(item[3])
             if evalue is self.Default:
@@ -384,7 +384,14 @@
 
         if ok:
             if xlat:
-                translated = self.translate(msgid or value, value, {})
+                # for text/xml we need to use the utranslate() method
+                # since attribute names will be unicode string causing
+                # trouble in line 410 
+                if self.html:
+                    translated = self.translate(msgid or value, value, {})
+                else:
+                    translated = self.utranslate(msgid or value, value, {})
+
                 if translated is not None:
                     value = translated
             if value is None:
@@ -392,7 +399,14 @@
             elif evalue is self.Default:
                 value = attrEscape(value)
             else:
-                value = escape(value, quote=1)
+                if quote_type == 'structure':
+                    value = value.replace('"','&quot;')
+                # DM 2005-11-12: 'mltext' support (see 'TALDefs' for details)
+                elif quote_type == 'mltext':
+                    value = value.replace('"','&quot;').replace('<','&lt;')
+                else:
+                    value = escape(value, quote=1)
+
             value = '%s="%s"' % (name, value)
         return ok, name, value
     bytecode_handlers["<attrAction>"] = attrAction
@@ -497,7 +511,9 @@
     def do_insertText(self, stuff):
         self.interpret(stuff[1])
 
-    def do_insertText_tal(self, stuff):
+    # DM 2005-11-12: 'mltext' support (see 'TALDefs' for details)
+    #def do_insertText_tal(self, stuff):
+    def do_insertText_tal(self, stuff, escape=escape):
         text = self.engine.evaluateText(stuff[0])
         if text is None:
             return
@@ -516,8 +532,16 @@
             self.col = len(s) - (i + 1)
     bytecode_handlers["insertText"] = do_insertText
 
+    # DM 2005-11-12: 'mltext' support (see 'TALDefs' for details)
+    def do_insertMLText_tal(self, stuff, escape=lambda s: s.replace('<','&lt;')):
+        return self.do_insertText_tal(stuff, escape)
+    bytecode_handlers["insertMLText"] = do_insertText
+
     def do_i18nVariable(self, stuff):
-        varname, program, expression, structure = stuff
+        # DM 2005-11-12: 'mltext' support (see 'TALDefs' for details)
+        #  Note: 'stuff' no longer contains 'structure' but the quote type
+        #varname, program, expression, structure = stuff
+        varname, program, expression, quote_type = stuff
         if expression is None:
             # The value is implicitly the contents of this tag, so we have to
             # evaluate the mini-program to get the value of the variable.
@@ -534,7 +558,10 @@
         else:
             # Evaluate the value to be associated with the variable in the
             # i18n interpolation dictionary.
-            if structure:
+            # DM 2005-11-12: 'mltext' support (see 'TALDefs' for details)
+            #   Note: 'structure' replaced by 'quote_type'
+            #if structure:
+            if quote_type == 'structure':
                 value = self.engine.evaluateStructure(expression)
             else:
                 value = self.engine.evaluate(expression)
@@ -545,7 +572,12 @@
                 value = self.engine.translate(value.domain, value,
                                               value.mapping)
 
-            if not structure:
+            # DM 2005-11-12: 'mltext' support (see 'TALDefs' for details)
+            #   Note: 'structure' replaced by 'quote_type'
+            #if not structure:
+            if quote_type == 'mltext':
+                value = value.replace('<', '&lt;')
+            elif quote_type != 'structure':
                 value = cgi.escape(ustr(value))
 
         # Either the i18n:name tag is nested inside an i18n:translate in which
@@ -660,6 +692,15 @@
         return self.engine.translate(self.i18nContext.domain,
                                      msgid, i18ndict, default=default)
 
+    def utranslate(self, msgid, default, i18ndict, obj=None):
+        if obj:
+            i18ndict.update(obj)
+        if not self.i18nInterpolate:
+            return msgid
+        # XXX We need to pass in one of context or target_language
+        return self.engine.utranslate(self.i18nContext.domain,
+                                     msgid, i18ndict, default=default)
+
     def do_rawtextColumn(self, (s, col)):
         self._stream_write(s)
         self.col = col
@@ -733,7 +774,14 @@
                 if self.sourceFile != prev_source:
                     self.engine.setSourceFile(prev_source)
                     self.sourceFile = prev_source
-                self.pushMacro(macroName, slots, entering=0)
+                # DM: leads to really strange behaviour when macro
+                #   definitions are nested
+                #   Furthermore, it is unclear why a defined slot
+                #   should change the nature.
+                # self.pushMacro(macroName, slots, entering=0)
+                self.pushMacro(macroName, slots,
+                               #entering=0,
+                               )
                 return
             self.pushMacro(macroName, slots)
             # Falling out of the 'if' allows the macro to be interpreted.
@@ -774,6 +822,8 @@
     bytecode_handlers_tal["setGlobal"] = do_setGlobal_tal
     bytecode_handlers_tal["insertStructure"] = do_insertStructure_tal
     bytecode_handlers_tal["insertText"] = do_insertText_tal
+    # DM 2005-11-12: 'mltext' support (see 'TALDefs' for details)
+    bytecode_handlers_tal["insertMLText"] = do_insertMLText_tal
     bytecode_handlers_tal["loop"] = do_loop_tal
     bytecode_handlers_tal["onError"] = do_onError_tal
     bytecode_handlers_tal["<attrAction>"] = attrAction_tal

-- 
Dieter

_______________________________________________
Zope maillist  -  [email protected]
http://mail.zope.org/mailman/listinfo/zope
**   No cross posts or HTML encoding!  **
(Related lists - 
 http://mail.zope.org/mailman/listinfo/zope-announce
 http://mail.zope.org/mailman/listinfo/zope-dev )

Re: [Zope] Using tal:attributes in XML with non-ASCII characters

Reply via email to