morten      01/11/22 05:08:23

  Modified:    java/src/org/apache/xalan/xsltc/compiler Choose.java
                        Parser.java SyntaxTreeNode.java Text.java
               java/src/org/apache/xalan/xsltc/runtime TextOutput.java
  Log:
  Fix for a whole wack of bugs related to text-nodes and handling of whitespace
  and special characters.
  PR:           bugzilla 1403, 1520, 3005, 3418 and 3690
  Obtained from:        n/a
  Submitted by: [EMAIL PROTECTED]
  Reviewed by:  [EMAIL PROTECTED]
  
  Revision  Changes    Path
  1.5       +4 -1      xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Choose.java
  
  Index: Choose.java
  ===================================================================
  RCS file: /home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Choose.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- Choose.java       2001/10/29 11:47:25     1.4
  +++ Choose.java       2001/11/22 13:08:23     1.5
  @@ -1,5 +1,5 @@
   /*
  - * @(#)$Id: Choose.java,v 1.4 2001/10/29 11:47:25 morten Exp $
  + * @(#)$Id: Choose.java,v 1.5 2001/11/22 13:08:23 morten Exp $
    *
    * The Apache Software License, Version 1.1
    *
  @@ -112,6 +112,9 @@
                    error = new ErrorMsg(ErrorMsg.MULTIPLE_OTHERWISE_ERR, this);
                    getParser().reportError(Constants.ERROR, error);
                }
  +         }
  +         else if (element instanceof Text) {
  +             ((Text)element).ignore();
            }
            // It is an error if we find some other element here
            else {
  
  
  
  1.36      +16 -10    xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Parser.java
  
  Index: Parser.java
  ===================================================================
  RCS file: /home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Parser.java,v
  retrieving revision 1.35
  retrieving revision 1.36
  diff -u -r1.35 -r1.36
  --- Parser.java       2001/11/09 15:14:21     1.35
  +++ Parser.java       2001/11/22 13:08:23     1.36
  @@ -1,5 +1,5 @@
   /*
  - * @(#)$Id: Parser.java,v 1.35 2001/11/09 15:14:21 tmiller Exp $
  + * @(#)$Id: Parser.java,v 1.36 2001/11/22 13:08:23 morten Exp $
    *
    * The Apache Software License, Version 1.1
    *
  @@ -1148,23 +1148,29 @@
        String string = new String(ch, start, length);
        SyntaxTreeNode parent = (SyntaxTreeNode)_parentStack.peek();
   
  +     if (string.length() == 0) return;
  +
        // If this text occurs within an <xsl:text> element we append it
        // as-is to the existing text element
        if (parent instanceof Text) {
  -         if (string.length() > 0) {
  -             ((Text)parent).setText(string);
  -         }
  +         ((Text)parent).setText(string);
  +         return;
        }
  +
        // Ignore text nodes that occur directly under <xsl:stylesheet>
  -     else if (parent instanceof Stylesheet) {
  +     if (parent instanceof Stylesheet) return;
   
  -     }
  -     // Add it as a regular text node otherwise
  -     else {
  -         if (string.trim().length() > 0) {
  -             parent.addElement(new Text(string));
  +     SyntaxTreeNode bro = parent.lastChild();
  +     if ((bro != null) && (bro instanceof Text)) {
  +         Text text = (Text)bro;
  +         if (!text.isTextElement()) {
  +             text.setText(string);
  +             return;
            }
        }
  +
  +     // Add it as a regular text node otherwise
  +     parent.addElement(new Text(string));
       }
   
       private String getTokenValue(String token) {
  
  
  
  1.16      +10 -1     
xml-xalan/java/src/org/apache/xalan/xsltc/compiler/SyntaxTreeNode.java
  
  Index: SyntaxTreeNode.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/SyntaxTreeNode.java,v
  retrieving revision 1.15
  retrieving revision 1.16
  diff -u -r1.15 -r1.16
  --- SyntaxTreeNode.java       2001/11/08 10:23:39     1.15
  +++ SyntaxTreeNode.java       2001/11/22 13:08:23     1.16
  @@ -1,5 +1,5 @@
   /*
  - * @(#)$Id: SyntaxTreeNode.java,v 1.15 2001/11/08 10:23:39 morten Exp $
  + * @(#)$Id: SyntaxTreeNode.java,v 1.16 2001/11/22 13:08:23 morten Exp $
    *
    * The Apache Software License, Version 1.1
    *
  @@ -705,6 +705,15 @@
        */
       protected final Object elementAt(int pos) {
        return _contents.elementAt(pos);
  +    }
  +
  +    /**
  +     * Returns this element's last child
  +     * @return The child node.
  +     */
  +    protected final SyntaxTreeNode lastChild() {
  +     if (_contents.size() == 0) return null;
  +     return (SyntaxTreeNode)_contents.lastElement();
       }
   
       /**
  
  
  
  1.8       +32 -5     xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Text.java
  
  Index: Text.java
  ===================================================================
  RCS file: /home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Text.java,v
  retrieving revision 1.7
  retrieving revision 1.8
  diff -u -r1.7 -r1.8
  --- Text.java 2001/11/08 10:23:39     1.7
  +++ Text.java 2001/11/22 13:08:23     1.8
  @@ -1,5 +1,5 @@
   /*
  - * @(#)$Id: Text.java,v 1.7 2001/11/08 10:23:39 morten Exp $
  + * @(#)$Id: Text.java,v 1.8 2001/11/22 13:08:23 morten Exp $
    *
    * The Apache Software License, Version 1.1
    *
  @@ -71,13 +71,17 @@
   import org.apache.xalan.xsltc.compiler.util.*;
   
   final class Text extends Instruction {
  +
       private String _text;
       private boolean _escaping = true;
  +    private boolean _ignore = false;
  +    private boolean _textElement = false;
   
       /**
        * Create a blank Text syntax tree node.
        */
       public Text() {
  +     _textElement = true;
       }
   
       /**
  @@ -117,10 +121,33 @@
                
       public void parseContents(Parser parser) {
           final String str = getAttribute("disable-output-escaping");
  -     if ((str != null) && (str.equals("yes"))) {
  -         _escaping = false;
  -     }
  +     if ((str != null) && (str.equals("yes"))) _escaping = false;
  +
        parseChildren(parser);
  +
  +     if (_text == null) {
  +         _ignore = true;
  +     }
  +     else if (_textElement) {
  +         if (_text.length() == 0) _ignore = true;
  +     }
  +     else if (getParent() instanceof LiteralElement) {
  +         LiteralElement element = (LiteralElement)getParent();
  +         String space = element.getAttribute("xml:space");
  +         if ((space == null) || (!space.equals("preserve")))
  +             if (_text.trim().length() == 0) _ignore = true;
  +     }
  +     else {
  +         if (_text.trim().length() == 0) _ignore = true;
  +     }
  +    }
  +
  +    public void ignore() {
  +     _ignore = true;
  +    }
  +
  +    public boolean isTextElement() {
  +     return _textElement;
       }
   
       protected boolean contextDependent() {
  @@ -131,7 +158,7 @@
        final ConstantPoolGen cpg = classGen.getConstantPool();
        final InstructionList il = methodGen.getInstructionList();
   
  -     if (_text != null && _text.length() > 0) {
  +     if (!_ignore) {
            // Turn off character escaping if so is wanted.
            final int esc = cpg.addInterfaceMethodref(OUTPUT_HANDLER,
                                                      "setEscaping", "(Z)Z");
  
  
  
  1.44      +127 -56   
xml-xalan/java/src/org/apache/xalan/xsltc/runtime/TextOutput.java
  
  Index: TextOutput.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/runtime/TextOutput.java,v
  retrieving revision 1.43
  retrieving revision 1.44
  diff -u -r1.43 -r1.44
  --- TextOutput.java   2001/11/21 10:54:01     1.43
  +++ TextOutput.java   2001/11/22 13:08:23     1.44
  @@ -1,5 +1,5 @@
   /*
  - * @(#)$Id: TextOutput.java,v 1.43 2001/11/21 10:54:01 morten Exp $
  + * @(#)$Id: TextOutput.java,v 1.44 2001/11/22 13:08:23 morten Exp $
    *
    * The Apache Software License, Version 1.1
    *
  @@ -124,7 +124,12 @@
       private static final int BEGCOMM_length = BEGCOMM.length;
       private static final int ENDCOMM_length = ENDCOMM.length;
   
  -    private static final String EMPTYSTRING = "";
  +    private static final String EMPTYSTRING     = "";
  +    private static final String HREF_STR        = "href";
  +    private static final String SRC_STR         = "str";
  +    private static final String CHAR_ESC_START  = "&#";
  +    private static final String CDATA_ESC_START = "]]>&#";
  +    private static final String CDATA_ESC_END   = ";<![CDATA[";
   
       private AttributeList _attributes = new AttributeList();
       private String        _elementName = null;
  @@ -361,54 +366,6 @@
       }
   
       /**
  -     * Utility method - escape special characters and pass to SAX handler
  -     */
  -    private void escapeCharacters(char[] ch, int off, int len)
  -     throws SAXException {
  -
  -     int limit = off + len;
  -     int offset = off;
  -
  -     if (limit > ch.length) limit = ch.length;;
  -
  -     // Step through characters and escape all special characters
  -     for (int i = off; i < limit; i++) {
  -         switch (ch[i]) {
  -         case '&':
  -             _saxHandler.characters(ch, offset, i - offset);
  -             _saxHandler.characters(AMP, 0, AMP_length);
  -             offset = i + 1;
  -             break;
  -             /* Quotes should only be escaped inside attribute values
  -         case '"':
  -             _saxHandler.characters(ch, offset, i - offset);
  -             _saxHandler.characters(QUOTE, 0, QUOTE_length);
  -             offset = i + 1;
  -             break;
  -             */
  -         case '<':
  -             _saxHandler.characters(ch, offset, i - offset);
  -             _saxHandler.characters(LT, 0, LT_length);
  -             offset = i + 1;
  -             break;
  -         case '>':
  -             _saxHandler.characters(ch, offset, i - offset);
  -             _saxHandler.characters(GT, 0, GT_length);
  -             offset = i + 1;
  -             break;
  -         case '\u00a0':
  -             _saxHandler.characters(ch, offset, i - offset);
  -             _saxHandler.characters(NBSP, 0, NBSP_length);
  -             offset = i + 1;
  -             break;
  -         }
  -         // TODO - more characters need escaping!!!
  -     }
  -     // Output remaining characters (that do not need escaping).
  -     if (offset < limit) _saxHandler.characters(ch, offset, limit - offset);
  -    }
  -
  -    /**
        * Utility method - pass a whole charactes as CDATA to SAX handler
        */
       private void startCDATA(char[] ch, int off, int len) throws SAXException {
  @@ -464,7 +421,10 @@
                    startCDATA(ch, off, len);
                // Output characters escaped if required.
                else if (_escapeChars)
  -                 escapeCharacters(ch, off, len);
  +                 if (_cdataTagOpen)
  +                     escapeCDATA(ch, off, len);
  +                 else
  +                     escapeCharacters(ch, off, len);
                // Output the chracters as the are if not.
                else
                    _saxHandler.characters(ch, off, len);
  @@ -582,9 +542,92 @@
       }
   
       /**
  +     * Utility method - escape special characters and pass to SAX handler
  +     */
  +    private void escapeCharacters(char[] ch, int off, int len)
  +     throws SAXException {
  +
  +     int limit = off + len;
  +     int offset = off;
  +
  +     if (limit > ch.length) limit = ch.length;;
  +
  +     // Step through characters and escape all special characters
  +     for (int i = off; i < limit; i++) {
  +         switch (ch[i]) {
  +         case '&':
  +             _saxHandler.characters(ch, offset, i - offset);
  +             _saxHandler.characters(AMP, 0, AMP_length);
  +             offset = i + 1;
  +             break;
  +         case '<':
  +             _saxHandler.characters(ch, offset, i - offset);
  +             _saxHandler.characters(LT, 0, LT_length);
  +             offset = i + 1;
  +             break;
  +         case '>':
  +             _saxHandler.characters(ch, offset, i - offset);
  +             _saxHandler.characters(GT, 0, GT_length);
  +             offset = i + 1;
  +             break;
  +         case '\u00a0':
  +             _saxHandler.characters(ch, offset, i - offset);
  +             _saxHandler.characters(NBSP, 0, NBSP_length);
  +             offset = i + 1;
  +             break;
  +         default:
  +             // Escape all characters not in the basic ASCII character set
  +             // to simple (hexadecimal) character references
  +             if (ch[i] > '\u00ff') {
  +                 StringBuffer buf = new StringBuffer(CHAR_ESC_START);
  +                 buf.append(Integer.toString((int)ch[i]));
  +                 buf.append(';');
  +                 final String esc = buf.toString();
  +                 final char[] chars = esc.toCharArray();
  +                 final int    strlen = esc.length();
  +                 _saxHandler.characters(ch, offset, i - offset);
  +                 _saxHandler.characters(chars, 0, strlen);
  +                 offset = i + 1;
  +             }
  +         }
  +     }
  +     // Output remaining characters (that do not need escaping).
  +     if (offset < limit) _saxHandler.characters(ch, offset, limit - offset);
  +    }
  +
  +    /**
  +     * Utility method - escape special characters and pass to SAX handler
  +     */
  +    private void escapeCDATA(char[] ch, int off, int len)
  +     throws SAXException {
  +
  +     int limit = off + len;
  +     int offset = off;
  +
  +     if (limit > ch.length) limit = ch.length;;
  +
  +     // Step through characters and escape all special characters
  +     for (int i = off; i < limit; i++) {
  +         if (ch[i] > '\u00ff') {
  +             StringBuffer buf = new StringBuffer(CDATA_ESC_START);
  +             buf.append(Integer.toString((int)ch[i]));
  +             buf.append(CDATA_ESC_END);
  +             final String esc = buf.toString();
  +             final char[] chars = esc.toCharArray();
  +             final int    strlen = esc.length();
  +             _saxHandler.characters(ch, offset, i - offset);
  +             _saxHandler.characters(chars, 0, strlen);
  +             offset = i + 1;
  +         }
  +     }
  +     // Output remaining characters (that do not need escaping).
  +     if (offset < limit) _saxHandler.characters(ch, offset, limit - offset);
  +    }
  +
  +    /**
        * This method escapes special characters used in attribute values
        */
  -    private String escapeChars(String value) {
  +    private String escapeString(String value) {
   
        int i;
        char[] ch = value.toCharArray();
  @@ -628,6 +671,33 @@
       }
   
       /**
  +     * This method escapes special characters used in HTML attribute values
  +     */
  +    private String escapeAttr(String base) {
  +
  +     final int len = base.length() - 1;
  +     final String str = "&quot;";
  +     int pos;
  +
  +     while ((pos = base.indexOf('"')) > -1) {
  +         if (pos == 0) {
  +             final String after = base.substring(1);
  +             base = str + after;
  +         }
  +         else if (pos == len) {
  +             final String before = base.substring(0, pos);
  +             base = before + str;
  +         }
  +         else {
  +             final String before = base.substring(0, pos);
  +             final String after = base.substring(pos+1);
  +             base = before + str + after;
  +         }
  +     }
  +     return base;
  +    }
  +
  +    /**
        * Replaces whitespaces in a URL with '%20'
        */
       private String quickAndDirtyUrlEncode(String base) {
  @@ -707,7 +777,7 @@
            }
            else {
                // Output as regular attribute
  -             _attributes.add(expandAttribute(name), escapeChars(value));
  +             _attributes.add(expandAttribute(name), escapeString(value));
            }
            return;
        case HTML:
  @@ -722,10 +792,11 @@
            // we do not change the meaning of the URL.
   
            // URL-encode href attributes in HTML output
  -         if  (name.toLowerCase().equals("href"))
  -             _attributes.add(name,quickAndDirtyUrlEncode(escapeChars(value)));
  +         final String tmp = name.toLowerCase();
  +         if  (tmp.equals(HREF_STR) || tmp.equals(SRC_STR))
  +             _attributes.add(name,quickAndDirtyUrlEncode(escapeAttr(value)));
            else
  -
  +             _attributes.add(expandAttribute(name), escapeAttr(value));
            return;
        }
       }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to