mrglavas    2004/07/18 12:57:42

  Modified:    java/src/org/apache/xerces/xinclude XIncludeHandler.java
                        XIncludeTextReader.java XInclude11TextReader.java
  Log:
  JIRA Issue #992:

  http://nagoya.apache.org/jira/browse/XERCESJ-992

  

  The parse method of XIncludeTextReader was reading

  from the input stream one character at a time, accumulating

  all the characters in a buffer before reporting them down the

  pipeline. This would be a space hog for large text includes.

  It's more efficient to read and report multiple chunks.

  

  Applying the patch from Ankit Pasricha with some modifications

  and additions. Now reading chunks from the stream and making

  multiple callbacks to characters() instead of accumulating all the

  text in the buffer. Modified XIncludeTextReader so that it is

  reusable. Reusing XIncludeTextReader in XIncludeHandler.

  Also using input-buffer-size property to determine the size 

  of the internal buffer used for processing text includes.
  
  Revision  Changes    Path
  1.27      +88 -14    
xml-xerces/java/src/org/apache/xerces/xinclude/XIncludeHandler.java
  
  Index: XIncludeHandler.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/xinclude/XIncludeHandler.java,v
  retrieving revision 1.26
  retrieving revision 1.27
  diff -u -r1.26 -r1.27
  --- XIncludeHandler.java      15 Apr 2004 04:51:56 -0000      1.26
  +++ XIncludeHandler.java      18 Jul 2004 19:57:42 -0000      1.27
  @@ -84,6 +84,11 @@
    *  <li>http://apache.org/xml/properties/internal/error-reporter</li>
    *  <li>http://apache.org/xml/properties/internal/entity-resolver</li>
    * </ul>
  + * Optional property:
  + * <ul>
  + *  <li>http://apache.org/xml/properties/input-buffer-size</li>
  + * </ul>
  + * 
    * Furthermore, the <code>NamespaceContext</code> used in the pipeline is required
    * to be an instance of <code>XIncludeNamespaceSupport</code>.
    * </p>
  @@ -171,6 +176,10 @@
       /** property identifier: security manager. */
       protected static final String SECURITY_MANAGER =
           Constants.XERCES_PROPERTY_PREFIX + Constants.SECURITY_MANAGER_PROPERTY;
  +    
  +    /** property identifier: buffer size. */
  +    public static final String BUFFER_SIZE =
  +        Constants.XERCES_PROPERTY_PREFIX + Constants.BUFFER_SIZE_PROPERTY;
   
       /** Recognized features. */
       private static final String[] RECOGNIZED_FEATURES =
  @@ -181,10 +190,10 @@
   
       /** Recognized properties. */
       private static final String[] RECOGNIZED_PROPERTIES =
  -        { ERROR_REPORTER, ENTITY_RESOLVER, SECURITY_MANAGER };
  +        { ERROR_REPORTER, ENTITY_RESOLVER, SECURITY_MANAGER, BUFFER_SIZE };
   
       /** Property defaults. */
  -    private static final Object[] PROPERTY_DEFAULTS = { null, null, null };
  +    private static final Object[] PROPERTY_DEFAULTS = { null, null, null, new 
Integer(XMLEntityManager.DEFAULT_BUFFER_SIZE) };
   
       // instance variables
   
  @@ -198,8 +207,11 @@
   
       // for XIncludeHandler
       protected XIncludeHandler fParentXIncludeHandler;
  +    
  +    // for buffer size in XIncludeTextReader
  +    protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE;
   
  -    // It's "feels wrong" to store this value here.  However,
  +    // It "feels wrong" to store this value here.  However,
       // calculating it can be time consuming, so we cache it.
       // It's never going to change in the lifetime of this XIncludeHandler
       protected String fParentRelativeURI;
  @@ -213,6 +225,10 @@
       protected XMLErrorReporter fErrorReporter;
       protected XMLEntityResolver fEntityResolver;
       protected SecurityManager fSecurityManager;
  +    
  +    // these are needed for text include processing
  +    protected XIncludeTextReader fXInclude10TextReader;
  +    protected XIncludeTextReader fXInclude11TextReader;
   
       // these are needed for XML Base processing
       protected XMLResourceIdentifier fCurrentBaseURI;
  @@ -370,6 +386,32 @@
           catch (XMLConfigurationException e) {
               fSecurityManager = null;
           }
  +        
  +        // Get buffer size.
  +        try {
  +            Integer value =
  +                (Integer)componentManager.getProperty(
  +                    BUFFER_SIZE);
  +
  +            if (value != null && value.intValue() > 0) {
  +                fBufferSize = value.intValue();
  +            }
  +            else {
  +             fBufferSize = ((Integer)getPropertyDefault(BUFFER_SIZE)).intValue();
  +            }
  +        }
  +        catch (XMLConfigurationException e) {
  +             fBufferSize = ((Integer)getPropertyDefault(BUFFER_SIZE)).intValue();
  +        }
  +        
  +        // Reset XML 1.0 text reader.
  +        if (fXInclude10TextReader != null) {
  +             fXInclude10TextReader.setBufferSize(fBufferSize);
  +        }
  +        // Reset XML 1.1 text reader.
  +        if (fXInclude11TextReader != null) {
  +            fXInclude11TextReader.setBufferSize(fBufferSize);   
  +        }
   
           fSettings = new ParserConfigurationSettings();
           copyFeatures(componentManager, fSettings);
  @@ -442,18 +484,36 @@
               if (fChildConfig != null) {
                   fChildConfig.setProperty(propertyId, value);
               }
  +            return;
           }
           if (propertyId.equals(ENTITY_RESOLVER)) {
               fEntityResolver = (XMLEntityResolver)value;
               if (fChildConfig != null) {
                   fChildConfig.setProperty(propertyId, value);
               }
  +            return;
           }
           if (propertyId.equals(SECURITY_MANAGER)) {
               fSecurityManager = (SecurityManager)value;
               if (fChildConfig != null) {
                   fChildConfig.setProperty(propertyId, value);
               }
  +            return;
  +        }
  +        if (propertyId.equals(BUFFER_SIZE)) {
  +            Integer bufferSize = (Integer) value;
  +            if (bufferSize != null && bufferSize.intValue() > 0) {
  +                fBufferSize = bufferSize.intValue();
  +                // Reset XML 1.0 text reader.
  +                if (fXInclude10TextReader != null) {
  +                    fXInclude10TextReader.setBufferSize(fBufferSize);
  +                }
  +                // Reset XML 1.1 text reader.
  +                if (fXInclude11TextReader != null) {
  +                    fXInclude11TextReader.setBufferSize(fBufferSize);
  +                }
  +            }
  +            return;
           }
   
       } // setProperty(String,Object)
  @@ -1253,21 +1313,34 @@
               // we only care about encoding for parse="text"
               String encoding = attributes.getValue(XINCLUDE_ATTR_ENCODING);
               includedSource.setEncoding(encoding);
  -
  -            XIncludeTextReader reader = null;
  +            XIncludeTextReader textReader = null;
  +            
               try {
  -                if (fIsXML11) {
  -                    reader = new XInclude11TextReader(includedSource, this);
  +                // Setup the appropriate text reader.
  +                if (!fIsXML11) {
  +                    if (fXInclude10TextReader == null) {
  +                        fXInclude10TextReader = new 
XIncludeTextReader(includedSource, this, fBufferSize);
  +                    }
  +                    else {
  +                        fXInclude10TextReader.setInputSource(includedSource);
  +                    }
  +                    textReader = fXInclude10TextReader;
                   }
                   else {
  -                    reader = new XIncludeTextReader(includedSource, this);
  +                    if (fXInclude11TextReader == null) {
  +                        fXInclude11TextReader = new 
XInclude11TextReader(includedSource, this, fBufferSize);
  +                    }
  +                    else {
  +                        fXInclude11TextReader.setInputSource(includedSource);
  +                    }
  +                    textReader = fXInclude11TextReader;
                   }
                   if (includedSource.getCharacterStream() == null
                       && includedSource.getByteStream() == null) {
  -                    reader.setHttpProperties(accept, acceptLanguage);
  +                     textReader.setHttpProperties(accept, acceptLanguage);
                   }
  -                reader.setErrorReporter(fErrorReporter);
  -                reader.parse();
  +                textReader.setErrorReporter(fErrorReporter);
  +                textReader.parse();
               }
               // encoding errors
               catch (MalformedByteSequenceException ex) {
  @@ -1285,9 +1358,9 @@
                   return false;
               }
               finally {
  -                if (reader != null) {
  +                if (textReader != null) {
                       try {
  -                        reader.close();
  +                        textReader.close();
                       }
                       catch (IOException e) {
                           reportResourceError(
  @@ -2192,4 +2265,5 @@
           }
           return true;
       }
  +    
   }
  
  
  
  1.11      +83 -50    
xml-xerces/java/src/org/apache/xerces/xinclude/XIncludeTextReader.java
  
  Index: XIncludeTextReader.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/xinclude/XIncludeTextReader.java,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- XIncludeTextReader.java   15 Apr 2004 04:51:56 -0000      1.10
  +++ XIncludeTextReader.java   18 Jul 2004 19:57:42 -0000      1.11
  @@ -25,15 +25,15 @@
   import java.net.URLConnection;
   import java.util.Locale;
   
  +import org.apache.xerces.impl.XMLEntityManager;
  +import org.apache.xerces.impl.XMLErrorReporter;
   import org.apache.xerces.impl.io.ASCIIReader;
   import org.apache.xerces.impl.io.UTF8Reader;
   import org.apache.xerces.impl.msg.XMLMessageFormatter;
  -import org.apache.xerces.impl.XMLEntityManager;
  -import org.apache.xerces.impl.XMLErrorReporter;
   import org.apache.xerces.util.EncodingMap;
   import org.apache.xerces.util.MessageFormatter;
   import org.apache.xerces.util.XMLChar;
  -import org.apache.xerces.util.XMLStringBuffer;
  +import org.apache.xerces.xni.XMLString;
   import org.apache.xerces.xni.parser.XMLInputSource;
   
   /**
  @@ -50,6 +50,7 @@
    * 
    * @author Michael Glavassevich, IBM
    * @author Peter McCracken, IBM
  + * @author Ankit Pasricha, IBM
    * @author Arun Yadav, Sun Microsystems Inc.
    *
    * @version $Id$
  @@ -62,6 +63,7 @@
       private XIncludeHandler fHandler;
       private XMLInputSource fSource;
       private XMLErrorReporter fErrorReporter;
  +    private XMLString fTempString = new XMLString();
       
       // Content negotation parameters
       private String fAccept;
  @@ -72,11 +74,13 @@
        *
        * @param source The XMLInputSource to use.
        * @param handler The XIncludeHandler to use.
  +     * @param bufferSize The size of this text reader's buffer.
        */
  -    public XIncludeTextReader(XMLInputSource source, XIncludeHandler handler)
  +    public XIncludeTextReader(XMLInputSource source, XIncludeHandler handler, int 
bufferSize)
           throws IOException {
           fHandler = handler;
           fSource = source;
  +        fTempString = new XMLString(new char[bufferSize + 1], 0, 0);
       }
       
       /**
  @@ -121,7 +125,7 @@
                   stream = source.getByteStream();
                   // Wrap the InputStream so that it is possible to rewind it.
                   if (!(stream instanceof BufferedInputStream)) {
  -                    stream = new BufferedInputStream(stream);
  +                    stream = new BufferedInputStream(stream, fTempString.ch.length);
                   }
               }
               else {
  @@ -227,7 +231,7 @@
               // this encoding has many aliases.
               if (encoding.equals("UTF-8")) {
                   return new UTF8Reader(stream, 
  -                    XMLEntityManager.DEFAULT_BUFFER_SIZE, 
  +                    fTempString.ch.length, 
                       
fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), 
                       fErrorReporter.getLocale() );
               }
  @@ -248,7 +252,7 @@
               }
               else if (javaEncoding.equals("ASCII")) {
                   return new ASCIIReader(stream,
  -                    XMLEntityManager.DEFAULT_BUFFER_SIZE,
  +                    fTempString.ch.length,
                       
fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), 
                       fErrorReporter.getLocale() );
               }
  @@ -405,56 +409,71 @@
        * @throws IOException
        */
       public void parse() throws IOException {
  -        // REVISIT: This method needs to be rewritten to improve performance: both
  -        // time and memory. We should be reading chunks and reporting chunks 
instead 
  -        // of reading characters individually and reporting all the characters in 
  -        // one callback. Also, currently we don't provide any locator information:
  -        // line number, column number, etc... so if we report an error it will 
appear
  -        // as if the invalid XML character was in the include parent. -- mrglavas
  -        XMLStringBuffer buffer = new XMLStringBuffer();
  +        
           fReader = getReader(fSource);
  -        int ch;
  -        while((ch = fReader.read()) != -1) {
  -            if (isValid(ch)) {
  -                buffer.append((char)ch);
  -            }
  -            else if (XMLChar.isHighSurrogate(ch)) {
  -             int ch2 = fReader.read();
  -             if (XMLChar.isLowSurrogate(ch2)) {
  -
  -                    // convert surrogates to a supplemental character
  -                    int sup = XMLChar.supplemental((char)ch, (char)ch2);
  -
  -                    // supplemental character must be a valid XML character
  -                    if (!isValid(sup)) {
  +        fSource = null;
  +        int readSize = fReader.read(fTempString.ch, 0, fTempString.ch.length - 1);
  +        while (readSize != -1) {
  +            for (int i = 0; i < readSize; ++i) {
  +                char ch = fTempString.ch[i];
  +                if (!isValid(ch)) {
  +                    if (XMLChar.isHighSurrogate(ch)) {
  +                        int ch2;
  +                        // retrieve next character
  +                        if (++i < readSize) {
  +                            ch2 = fTempString.ch[i];
  +                        }
  +                        // handle rare boundary case
  +                        else {
  +                            ch2 = fReader.read();
  +                            if (ch2 != -1) {
  +                                fTempString.ch[readSize++] = (char) ch2;
  +                            }
  +                        }
  +                        if (XMLChar.isLowSurrogate(ch2)) {
  +                            // convert surrogates to a supplemental character
  +                            int sup = XMLChar.supplemental(ch, (char)ch2);
  +                            if (!isValid(sup)) {
  +                                
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
  +                                                           "InvalidCharInContent", 
  +                                                           new Object[] { 
Integer.toString(sup, 16) },
  +                                                           
XMLErrorReporter.SEVERITY_FATAL_ERROR);
  +                            }
  +                        }
  +                        else {
  +                            
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
  +                                                       "InvalidCharInContent", 
  +                                                       new Object[] { 
Integer.toString(ch2, 16) },
  +                                                       
XMLErrorReporter.SEVERITY_FATAL_ERROR);
  +                        }
  +                    }
  +                    else {
                           fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
                                                      "InvalidCharInContent", 
  -                                                   new Object[] { 
Integer.toString(sup, 16) },
  +                                                   new Object[] { 
Integer.toString(ch, 16) },
                                                      
XMLErrorReporter.SEVERITY_FATAL_ERROR);
  -                        continue;
  -                    }                 
  -                    buffer.append((char) ch);
  -                    buffer.append((char) ch2);
  -                }
  -                else {
  -                    fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
  -                                               "InvalidCharInContent", 
  -                                               new Object[] { Integer.toString(ch, 
16) },
  -                                               
XMLErrorReporter.SEVERITY_FATAL_ERROR);
  +                    }
                   }
               }
  -            else {
  -                fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
  -                                           "InvalidCharInContent", 
  -                                           new Object[] { Integer.toString(ch, 16) 
},
  -                                           XMLErrorReporter.SEVERITY_FATAL_ERROR);
  +            if (fHandler != null && readSize > 0) {
  +                fTempString.offset = 0;
  +                fTempString.length = readSize;
  +                fHandler.characters(
  +                    fTempString,
  +                    fHandler.modifyAugmentations(null, true));
               }
  +            readSize = fReader.read(fTempString.ch, 0, fTempString.ch.length - 1);
           }
  -        if (fHandler != null && buffer.length > 0) {
  -            fHandler.characters(
  -                buffer,
  -                fHandler.modifyAugmentations(null, true));
  -        }
  +        
  +    }
  +    
  +    /**
  +     * Sets the input source on this text reader.
  +     * 
  +     * @param source The XMLInputSource to use.
  +     */
  +    public void setInputSource(XMLInputSource source) {
  +        fSource = source;
       }
       
       /**
  @@ -466,6 +485,7 @@
       public void close() throws IOException {
           if (fReader != null) {
               fReader.close();
  +            fReader = null;
           }
       }
       
  @@ -478,4 +498,17 @@
       protected boolean isValid(int ch) {
           return XMLChar.isValid(ch);
       }
  +    
  +    /**
  +     * Sets the buffer size property for the reader which decides the chunk sizes 
that are parsed
  +     * by the reader at a time and passed to the handler
  +     * 
  +     * @param bufferSize The size of the buffer desired
  +     */
  +    protected void setBufferSize(int bufferSize) {
  +     if (fTempString.ch.length != ++bufferSize) {
  +             fTempString.ch = new char[bufferSize];
  +        }
  +    }
  + 
   }
  
  
  
  1.3       +4 -3      
xml-xerces/java/src/org/apache/xerces/xinclude/XInclude11TextReader.java
  
  Index: XInclude11TextReader.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xerces/java/src/org/apache/xerces/xinclude/XInclude11TextReader.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- XInclude11TextReader.java 24 Feb 2004 23:15:52 -0000      1.2
  +++ XInclude11TextReader.java 18 Jul 2004 19:57:42 -0000      1.3
  @@ -40,10 +40,11 @@
        *
        * @param source The XMLInputSource to use.
        * @param handler The XIncludeHandler to use.
  +     * @param bufferSize The size of this text reader's buffer.
        */
  -    public XInclude11TextReader(XMLInputSource source, XIncludeHandler handler)
  +    public XInclude11TextReader(XMLInputSource source, XIncludeHandler handler, int 
bufferSize)
           throws IOException {
  -        super(source, handler);
  +        super(source, handler, bufferSize);
       }
       
       /**
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to