elena       2003/11/18 14:59:50

  Modified:    java/src/org/apache/xml/serialize BaseMarkupSerializer.java
                        XML11Serializer.java XMLSerializer.java
  Log:
  We need to escape CR during text normalization and TAB, CR, NL during attribute
  value serialization to allow roundtripping [1], [2]. Similar escaping is done for 
XML11 serialization.
  
  Also checking for "unbound-prefix-in-entity-reference" [2]. Many thanks to Neil 
Delima
  who helped with providing this fix.
  [1] http://www.w3.org/TR/xslt-xquery-serialization/#xml-output
  [2] http://www.w3.org/TR/2003/CR-DOM-Level-3-LS-20031107/
  
  Revision  Changes    Path
  1.48      +26 -31    
xml-xerces/java/src/org/apache/xml/serialize/BaseMarkupSerializer.java
  
  Index: BaseMarkupSerializer.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xml/serialize/BaseMarkupSerializer.java,v
  retrieving revision 1.47
  retrieving revision 1.48
  diff -u -r1.47 -r1.48
  --- BaseMarkupSerializer.java 17 Nov 2003 13:47:36 -0000      1.47
  +++ BaseMarkupSerializer.java 18 Nov 2003 22:59:50 -0000      1.48
  @@ -1048,13 +1048,8 @@
                       switch (code) {
                           case NodeFilter.FILTER_REJECT:
                           case NodeFilter.FILTER_SKIP: { 
  -                            // REVISIT: the constant FILTER_SKIP should be changed 
when new
  -                            // DOM LS specs gets published
  -
  -                            // skip the text node
                               break;
                           }
  -
                           default: {
                               characters(text);
                           }
  @@ -1079,9 +1074,6 @@
                       switch (code) {
                           case NodeFilter.FILTER_REJECT:
                           case NodeFilter.FILTER_SKIP: { 
  -                            // REVISIT: the constant FILTER_SKIP should be changed 
when new
  -                            // DOM LS specs gets published
  -
                               // skip the CDATA node
                               return;
                           }
  @@ -1111,9 +1103,6 @@
                             switch (code) {
                                 case NodeFilter.FILTER_REJECT:
                                 case NodeFilter.FILTER_SKIP: { 
  -                                  // REVISIT: the constant FILTER_SKIP should be 
changed when new
  -                                  // DOM LS specs gets published
  -
                                     // skip the comment node
                                     return;
                                 }
  @@ -1142,9 +1131,6 @@
                           return; // remove the node
                         }
                         case NodeFilter.FILTER_SKIP: { 
  -                          // REVISIT: the constant FILTER_SKIP should be changed 
when new
  -                          // DOM LS specs gets published
  -
                             child = node.getFirstChild();
                             while ( child != null ) {
                                 serializeNode( child );
  @@ -1154,19 +1140,13 @@
                         }
   
                         default: {
  -                          child = node.getFirstChild();
  -                          if (child !=null) {
  -                              _printer.printText("&");
  -                              _printer.printText(node.getNodeName());
  -                              _printer.printText(";");
  -                          }
  -                          return;
  +                           // fall through
                         }
                     }
  -              }
  -
  +              }                      
               child = node.getFirstChild();
               if ( child == null || (fFeatures !=null && 
getFeature(Constants.DOM_ENTITIES))){
  +                             checkUnboundNamespacePrefixedNode(node);
                   _printer.printText("&");
                   _printer.printText(node.getNodeName());
                   _printer.printText(";");
  @@ -1540,9 +1520,7 @@
                           _printer.printText(";<![CDATA[");
                       }  
                       else {
  -                        _printer.printText("&#x");                        
  -                        _printer.printText(Integer.toHexString(supplemental));      
                  
  -                        _printer.printText(";");
  +                        printHex(supplemental);
                       }
                   }
               }
  @@ -1690,12 +1668,19 @@
                   _printer.printText((char)(((ch-0x10000)&0x3ff)+0xdc00));
               }
           } else {
  -            // The character is not printable, print as character reference.
  -            _printer.printText( "&#x" );
  -            _printer.printText(Integer.toHexString(ch));
  -            _printer.printText( ';' );
  +                     printHex(ch);
           }
       }
  +    
  +     /**
  +      * Escapes chars
  +      */ 
  +      final void printHex( int ch) throws IOException {      
  +              _printer.printText( "&#x" );
  +              _printer.printText(Integer.toHexString(ch));
  +              _printer.printText( ';' );
  +             
  +      }
   
   
       /**
  @@ -1878,4 +1863,14 @@
               throw new IOException(message);
           }
       }
  +    
  +     /**
  +      * DOM level 3: 
  +      * Check a node to determine if it contains unbound namespace prefixes.
  +      *
  +      * @param node The node to check for unbound namespace prefices
  +      */
  +      protected void checkUnboundNamespacePrefixedNode (Node node) throws 
IOException{
  +             
  +      }
   }
  
  
  
  1.7       +29 -36    
xml-xerces/java/src/org/apache/xml/serialize/XML11Serializer.java
  
  Index: XML11Serializer.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xml/serialize/XML11Serializer.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- XML11Serializer.java      17 Oct 2003 17:41:41 -0000      1.6
  +++ XML11Serializer.java      18 Nov 2003 22:59:50 -0000      1.7
  @@ -311,7 +311,19 @@
                   }
                   continue;
               }
  -            printXMLChar(ch, false);
  +            if (ch == '\n' || ch == '\r' || ch == '\t' || ch == 0x0085 || ch == 
0x2028){
  +                             printHex(ch);
  +                     } else if (ch == '<') {
  +                             _printer.printText("&lt;");
  +                     } else if (ch == '&') {
  +                             _printer.printText("&amp;");
  +                     } else if (ch == '"') {
  +                             _printer.printText("&quot;");
  +                     } else if ((ch >= ' ' && _encodingInfo.isPrintable((char) 
ch))) {
  +                             _printer.printText((char) ch);
  +                     } else {
  +                             printHex(ch);
  +                     }
           }
       }
   
  @@ -376,9 +388,11 @@
   
       // note that this "int" should, in all cases, be a char.
       // REVISIT:  make it a char...
  -    protected final void printXMLChar( int ch, boolean keepQuot ) throws 
IOException {
  -
  -        if ( ch == '<') {
  +    protected final void printXMLChar( int ch ) throws IOException {
  +     
  +     if (ch == '\r' || ch == 0x0085 || ch == 0x2028) {
  +                     printHex(ch);
  +     } else if ( ch == '<') {
               _printer.printText("&lt;");
           } else if (ch == '&') {
               _printer.printText("&amp;");
  @@ -386,15 +400,10 @@
                        // character sequence "]]>" can't appear in content, therefore
                        // we should escape '>' 
                        _printer.printText("&gt;");
  -        } else if ( ch == '"' && !keepQuot) {
  -            _printer.printText("&quot;");
           } else if ( _encodingInfo.isPrintable((char)ch) && 
XML11Char.isXML11ValidLiteral(ch)) { 
               _printer.printText((char)ch);
           } else {
  -            // The character is not printable, print as character reference.
  -            _printer.printText( "&#x" );
  -            _printer.printText(Integer.toHexString(ch));
  -            _printer.printText( ';' );
  +             printHex(ch);
           }
       }
   
  @@ -419,9 +428,7 @@
                           _printer.printText(";<![CDATA[");
                       }  
                       else {
  -                        _printer.printText("&#x");                        
  -                        _printer.printText(Integer.toHexString(supplemental));      
                  
  -                        _printer.printText(";");
  +                                             printHex(supplemental);
                       }
                   }
               }
  @@ -456,7 +463,7 @@
                   if ( unescaped  && XML11Char.isXML11ValidLiteral(ch)) {
                       _printer.printText( ch );
                   } else
  -                    printXMLChar( ch, true );
  +                    printXMLChar( ch );
               }
           } else {
               // Not preserving spaces: print one part at a time, and
  @@ -475,18 +482,11 @@
                       }
                       continue;
                   }
  -                // Nonterminal S is unchanged in XML 1.1, so NEL (0x85) 
  -                // and LSEP (0x2028) are not space characters.
  -                //
  -                // REVISIT: NEL and LSEP need to be escaped in order for
  -                // them to be roundtripped, otherwise they will be
  -                // normalized to LF when the document is read back. - mrglavas
  -                if ( XMLChar.isSpace(ch))
  -                    _printer.printSpace();
  -                else if ( unescaped && XML11Char.isXML11ValidLiteral(ch) )
  +
  +                if ( unescaped && XML11Char.isXML11ValidLiteral(ch) )
                       _printer.printText( ch );
                   else
  -                    printXMLChar( ch, true);
  +                    printXMLChar( ch);
               }
           }
       }
  @@ -518,7 +518,7 @@
                   if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
                       _printer.printText( ch );
                   else
  -                    printXMLChar( ch, true );
  +                    printXMLChar( ch );
               }
           } else {
               // Not preserving spaces: print one part at a time, and
  @@ -539,18 +539,11 @@
                       }
                       continue;
                   }
  -                // Nonterminal S is unchanged in XML 1.1, so NEL (0x85) 
  -                // and LSEP (0x2028) are not space characters.
  -                //
  -                // REVISIT: NEL and LSEP need to be escaped in order for
  -                // them to be roundtripped, otherwise they will be
  -                // normalized to LF when the document is read back. - mrglavas
  -                if (XMLChar.isSpace(ch))
  -                    _printer.printSpace();
  -                else if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
  +              
  +                if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
                       _printer.printText( ch );
                   else
  -                    printXMLChar( ch, true );
  +                    printXMLChar( ch );
               }
           }
       }
  
  
  
  1.55      +104 -32   xml-xerces/java/src/org/apache/xml/serialize/XMLSerializer.java
  
  Index: XMLSerializer.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/XMLSerializer.java,v
  retrieving revision 1.54
  retrieving revision 1.55
  diff -u -r1.54 -r1.55
  --- XMLSerializer.java        23 Sep 2003 21:42:31 -0000      1.54
  +++ XMLSerializer.java        18 Nov 2003 22:59:50 -0000      1.55
  @@ -1235,30 +1235,42 @@
       }
   
       //
  -    // overwrite printing functions to make sure serializer prints out valid XML
  +    // Printing attribute value
       //
  -    protected void printEscaped( String source ) throws IOException {
  +    protected void printEscaped(String source) throws IOException {
           int length = source.length();
  -        for ( int i = 0 ; i < length ; ++i ) {
  +        for (int i = 0; i < length; ++i) {
               int ch = source.charAt(i);
               if (!XMLChar.isValid(ch)) {
  -                if (++i <length) {
  +                if (++i < length) {
                       surrogates(ch, source.charAt(i));
                   } else {
  -                    fatalError("The character '"+(char)ch+"' is an invalid XML 
character"); 
  +                    fatalError("The character '" + (char) ch + "' is an invalid XML 
character");
                   }
                   continue;
               }
  -            printXMLChar(ch, false);
  +            // escape NL, CR, TAB
  +            if (ch == '\n' || ch == '\r' || ch == '\t') {
  +                printHex(ch);
  +            } else if (ch == '<') {
  +                _printer.printText("&lt;");
  +            } else if (ch == '&') {
  +                _printer.printText("&amp;");
  +            } else if (ch == '"') {
  +                _printer.printText("&quot;");
  +            } else if ((ch >= ' ' && _encodingInfo.isPrintable((char) ch))) {
  +                _printer.printText((char) ch);
  +            } else {
  +                printHex(ch);
  +            }
           }
       }
   
  -    // note that this "int" should, in all cases, be a char.
  -    // if printing out attribute values, the double quotes mark is converted to
  -    // quot, otherwise keep quotations as is.
  -    protected void printXMLChar( int ch, boolean keepQuot) throws IOException {
  -        
  -        if ( ch == '<') {
  +    /** print text data */
  +    protected void printXMLChar( int ch) throws IOException {
  +     if (ch == '\r') {
  +                     printHex(ch);
  +     } else if ( ch == '<') {
               _printer.printText("&lt;");
           } else if (ch == '&') {
               _printer.printText("&amp;");
  @@ -1266,17 +1278,11 @@
                // character sequence "]]>" can't appear in content, therefore
                // we should escape '>' 
                        _printer.printText("&gt;");             
  -        } else if ( ch == '"' && ! keepQuot) {
  -            _printer.printText("&quot;");
  -        } else if ( ( ch >= ' ' && _encodingInfo.isPrintable((char)ch)) ||
  -                    ch == '\n' || ch == '\r' || ch == '\t' ) {
  -                     // REVISIT: new line characters must be escaped
  +        } else if ( ch == '\n' ||  ch == '\t' ||
  +                    ( ch >= ' ' && _encodingInfo.isPrintable((char)ch))) {
               _printer.printText((char)ch);
           } else {
  -            // The character is not printable, print as character reference.
  -            _printer.printText( "&#x" );
  -            _printer.printText(Integer.toHexString(ch));
  -            _printer.printText( ';' );
  +                     printHex(ch);
           }
       }
   
  @@ -1304,7 +1310,7 @@
                   if ( unescaped ) {
                       _printer.printText( ch );
                   } else
  -                    printXMLChar( ch, true );
  +                    printXMLChar( ch );
               }
           } else {
               // Not preserving spaces: print one part at a time, and
  @@ -1323,12 +1329,11 @@
                       }
                       continue;
                   }
  -                if ( XMLChar.isSpace(ch))
  -                    _printer.printSpace();
  -                else if ( unescaped )
  +
  +                             if ( unescaped )
                       _printer.printText( ch );
                   else
  -                    printXMLChar( ch, true);
  +                    printXMLChar( ch);
               }
           }
       }
  @@ -1360,7 +1365,7 @@
                   if ( unescaped )
                       _printer.printText( ch );
                   else
  -                    printXMLChar( ch, true );
  +                    printXMLChar( ch );
               }
           } else {
               // Not preserving spaces: print one part at a time, and
  @@ -1381,16 +1386,83 @@
                       }
                       continue;
                   }
  -                if ( XMLChar.isSpace(ch))
  -                    _printer.printSpace();
  -                else if ( unescaped )
  +                if ( unescaped )
                       _printer.printText( ch );
                   else
  -                    printXMLChar( ch, true );
  +                    printXMLChar( ch );
               }
           }
       }
   
  +
  +   /**
  +    * DOM Level 3:
  +    * Check a node to determine if it contains unbound namespace prefixes.
  +    *
  +    * @param node The node to check for unbound namespace prefices
  +    */
  +     protected void checkUnboundNamespacePrefixedNode (Node node) throws 
IOException{
  +
  +             if (fNamespaces) {
  +
  +                     if (DEBUG) {
  +                         
System.out.println("==>serializeNode("+node.getNodeName()+") [Entity Reference - 
Namespaces on]");
  +                             System.out.println("==>Declared Prefix Count: " + 
fNSBinder.getDeclaredPrefixCount());
  +                             System.out.println("==>Node Name: " + 
node.getNodeName());
  +                             System.out.println("==>First Child Node Name: " + 
node.getFirstChild().getNodeName());
  +                             System.out.println("==>First Child Node Prefix: " + 
node.getFirstChild().getPrefix());
  +                             System.out.println("==>First Child Node NamespaceURI: 
" + node.getFirstChild().getNamespaceURI());                      
  +                     }
  +
  +             
  +                     Node child, next;
  +             for (child = node.getFirstChild(); child != null; child = next) {
  +                 next = child.getNextSibling();
  +                         if (DEBUG) {
  +                             
System.out.println("==>serializeNode("+child.getNodeName()+") [Child Node]");
  +                             
System.out.println("==>serializeNode("+child.getPrefix()+") [Child Node Prefix]");
  +                 }    
  +     
  +                         //If a NamespaceURI is not declared for the current
  +                         //node's prefix, raise a fatal error.
  +                         String prefix = child.getPrefix();
  +                         if (fNSBinder.getURI(prefix) == null && prefix != null) {
  +                                     fatalError("The replacement text of the entity 
node '" 
  +                                                             + node.getNodeName()  
  +                                                             + "' contains an 
element node '" 
  +                                                             + child.getNodeName() 
  +                                                             + "' with an 
undeclared prefix '" 
  +                                                             + prefix + "'.");
  +                         }   
  +
  +                             if (child.getNodeType() == Node.ELEMENT_NODE) {
  +                                     
  +                                     NamedNodeMap attrs = child.getAttributes();
  +                                     
  +                                     for (int i = 0; i< attrs.getLength(); i++ ) {
  +                                             
  +                                         String attrPrefix = 
attrs.item(i).getPrefix();
  +                                         if (fNSBinder.getURI(attrPrefix) == null 
&& attrPrefix != null) {
  +                                                     fatalError("The replacement 
text of the entity node '" 
  +                                                                             + 
node.getNodeName()  
  +                                                                             + "' 
contains an element node '" 
  +                                                                             + 
child.getNodeName() 
  +                                                                             + "' 
with an attribute '" 
  +                                                                             + 
attrs.item(i).getNodeName()                                                            
               
  +                                                                             + "' 
an undeclared prefix '" 
  +                                                                             + 
attrPrefix + "'.");
  +                                         }   
  +                                             
  +                                     }       
  +
  +                             }
  +                                     
  +                             if (child.hasChildNodes()) {
  +                                     checkUnboundNamespacePrefixedNode(child);
  +                             }       
  +             }
  +             }    
  +     }       
   
       public boolean reset() {
           super.reset();
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to