jkesselm    01/12/13 13:36:35

  Modified:    java/src/org/apache/xml/utils FastStringBuffer.java
  Log:
  Part 2b of 2: Reducing initial space requirements of a DTM. This may
  involve a slight performance hit in some circumstances due to
  smaller and more complex subdivision of FastStringBuffers,, but
  should help stylesheets which generate a lot of Result Tree
  Fragments.
  
  (Leveraging FSB's features more strongly exposed a bug in
  whitespace normalization, which I've fixed.)
  
  Note that a better long-term answer will be to reduce the number of
  DTMs tied up as RTFs
  
  Revision  Changes    Path
  1.15      +133 -20   
xml-xalan/java/src/org/apache/xml/utils/FastStringBuffer.java
  
  Index: FastStringBuffer.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xalan/java/src/org/apache/xml/utils/FastStringBuffer.java,v
  retrieving revision 1.14
  retrieving revision 1.15
  diff -u -r1.14 -r1.15
  --- FastStringBuffer.java     2001/10/05 15:50:11     1.14
  +++ FastStringBuffer.java     2001/12/13 21:36:35     1.15
  @@ -85,7 +85,29 @@
     // If nonzero, forces the inial chunk size.
     /**/static final int DEBUG_FORCE_INIT_BITS=0;
   
  -  /**
  +     /** Manefest constant: Suppress leading whitespace.
  +      * This should be used when normalize-to-SAX is called for the first 
chunk of a
  +      * multi-chunk output, or one following unsuppressed whitespace in a 
previous
  +      * chunk.
  +      * @see 
sendNormalizedSAXcharacters(char[],int,int,org.xml.sax.ContentHandler,int)
  +      */
  +     public static final int SUPPRESS_LEADING_WS=0x01;
  +     
  +     /** Manefest constant: Suppress trailing whitespace.
  +      * This should be used when normalize-to-SAX is called for the last 
chunk of a
  +      * multi-chunk output; it may have to be or'ed with SUPPRESS_LEADING_WS.
  +      */
  +     public static final int SUPPRESS_TRAILING_WS=0x02;
  +     
  +     /** Manefest constant: Suppress both leading and trailing whitespace.
  +      * This should be used when normalize-to-SAX is called for a complete 
string.
  +      * (I'm not wild about the name of this one. Ideas welcome.)
  +      * @see 
sendNormalizedSAXcharacters(char[],int,int,org.xml.sax.ContentHandler,int)
  +      */
  +     public static final int SUPPRESS_BOTH
  +             = SUPPRESS_LEADING_WS | SUPPRESS_TRAILING_WS;
  +
  +     /**
      * Field m_chunkBits sets our chunking strategy, by saying how many
      * bits of index can be used within a single chunk before flowing over
      * to the next chunk. For example, if m_chunkbits is set to 15, each
  @@ -983,13 +1005,31 @@
      * @param ch SAX ContentHandler object to receive the event.
      * @param start Offset of first character in the range.
      * @param length Number of characters to send.
  +   * @return normalization status to apply to next chunk (because we may
  +   * have been called recursively to process an inner FSB):
  +   * <dl>
  +   * <dt>0</dt>
  +   * <dd>if this output did not end in retained whitespace, and thus 
whitespace
  +   * at the start of the following chunk (if any) should be converted to a
  +   * single space.
  +   * <dt>SUPPRESS_LEADING_WS</dt>
  +   * <dd>if this output ended in retained whitespace, and thus whitespace
  +   * at the start of the following chunk (if any) should be completely
  +   * suppressed.</dd>
  +   * </dd>
  +   * </dl>
      * @exception org.xml.sax.SAXException may be thrown by handler's
      * characters() method.
      */
  -  public void sendNormalizedSAXcharacters(
  +  public int sendNormalizedSAXcharacters(
             org.xml.sax.ContentHandler ch, int start, int length)
               throws org.xml.sax.SAXException
     {
  +       // This call always starts at the beginning of the 
  +    // string being written out, either because it was called directly or
  +    // because it was an m_innerFSB recursion. This is important since
  +             // it gives us a well-known initial state for this flag:
  +             int stateForNextChunk=SUPPRESS_LEADING_WS;
   
       int stop = start + length;
       int startChunk = start >>> m_chunkBits;
  @@ -1000,51 +1040,97 @@
       for (int i = startChunk; i < stopChunk; ++i)
       {
         if (i == 0 && m_innerFSB != null)
  +                             stateForNextChunk=
           m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn,
                                        m_chunkSize - startColumn);
         else
  +                             stateForNextChunk=
           sendNormalizedSAXcharacters(m_array[i], startColumn, 
  -                                    m_chunkSize - startColumn, ch);
  +                                    m_chunkSize - startColumn, 
  +                                                                             
                                                                
ch,stateForNextChunk);
   
         startColumn = 0;  // after first chunk
       }
   
       // Last, or only, chunk
       if (stopChunk == 0 && m_innerFSB != null)
  +                     stateForNextChunk= // %REVIEW% Is this update really 
needed?
         m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn, stopColumn - 
startColumn);
       else if (stopColumn > startColumn)
       {
  -      sendNormalizedSAXcharacters(m_array[stopChunk], startColumn,
  -                    stopColumn - startColumn, ch);
  +                     stateForNextChunk= // %REVIEW% Is this update really 
needed?
  +      sendNormalizedSAXcharacters(m_array[stopChunk], 
  +                                                                             
                                                        startColumn, stopColumn 
- startColumn,
  +                                                                             
                                                        ch, stateForNextChunk | 
SUPPRESS_TRAILING_WS);
       }
  +             return stateForNextChunk;
     }
     
     static char[] m_oneChar = {' '};
  -  
  +       
     /**
  -   * Directly normalize and dispatch the character array.
  +   * Internal method to directly normalize and dispatch the character array.
  +   * This version is aware of the fact that it may be called several times
  +   * in succession if the data is made up of multiple "chunks", and thus
  +   * must actively manage the handling of leading and trailing whitespace.
      *
      * @param ch The characters from the XML document.
      * @param start The start position in the array.
      * @param length The number of characters to read from the array.
  -   * 
  +   * @param handler SAX ContentHandler object to receive the event.
  +   * @param edgeTreatmentFlags How leading/trailing spaces should be 
handled. 
  +   * This is a bitfield contining two flags, bitwise-ORed together:
  +   * <dl>
  +   * <dt>SUPPRESS_LEADING_WS</dt>
  +   * <dd>When false, causes leading whitespace to be converted to a single
  +   * space; when true, causes it to be discarded entirely.
  +   * Should be set TRUE for the first chunk, and (in multi-chunk output)
  +   * whenever the previous chunk ended in retained whitespace.</dd>
  +   * <dt>SUPPRESS_TRAILING_WS</dt>
  +   * <dd>When false, causes trailing whitespace to be converted to a single
  +   * space; when true, causes it to be discarded entirely.
  +   * Should be set TRUE for the last or only chunk.
  +   * </dd>
  +   * </dl>
  +   * @return normalization status, as in the edgeTreatmentFlags parameter:
  +   * <dl>
  +   * <dt>0</dt>
  +   * <dd>if this output did not end in retained whitespace, and thus 
whitespace
  +   * at the start of the following chunk (if any) should be converted to a
  +   * single space.
  +   * <dt>SUPPRESS_LEADING_WS</dt>
  +   * <dd>if this output ended in retained whitespace, and thus whitespace
  +   * at the start of the following chunk (if any) should be completely
  +   * suppressed.</dd>
  +   * </dd>
  +   * </dl>
      * @exception org.xml.sax.SAXException Any SAX exception, possibly
      *            wrapping another exception.
      */
  -  public static void sendNormalizedSAXcharacters(char ch[], 
  +  static int sendNormalizedSAXcharacters(char ch[], 
                int start, int length, 
  -             org.xml.sax.ContentHandler handler)
  +             org.xml.sax.ContentHandler handler,
  +                                              int edgeTreatmentFlags)
             throws org.xml.sax.SAXException
     {
  +             int stateForNextChunk=0; // Initially, assume no retained 
trailing spaces.
  +             
       int end = length + start;
  -    int s;
  -    for (s = start; s < end; s++)
  -    {
  -      char c = ch[s];
  -      if(!XMLCharacterRecognizer.isWhiteSpace(c))
  -        break;
  -    }
  +    int s=start;
  +             
  +             // Leading whitespaces should be _completely_ suppressed if and 
only if
  +             // (a) we're the first chunk in the normalized sequence or (b) 
the
  +             // previous chunk ended in a normalized-but-not-suppressed 
whitespace.
  +             if(0!= (edgeTreatmentFlags&SUPPRESS_LEADING_WS) )
  +                     for (; s < end; s++)
  +                     {
  +                             char c = ch[s];
  +                             if(!XMLCharacterRecognizer.isWhiteSpace(c))
  +                                     break;
  +                     }
   
  +             // Normal processing converts multiple whitespace characters 
into
  +             // a single whitespace
       boolean whiteSpaceFound = false;
       boolean needToFlushSpace = false;
       int d = s;
  @@ -1115,15 +1201,42 @@
       
       int len = (s-d);
       
  -    if(len > 0)
  +             // If we aren't at the end of the (possibly multi-chunk) text,
  +             // we should ouput the single space even if there is nothing
  +             // following it in this chunk
  +    if(len > 0 || 0==(edgeTreatmentFlags&SUPPRESS_TRAILING_WS) )
       {
         if(needToFlushSpace)
  -        handler.characters(m_oneChar, 0, 1);
  -      handler.characters(ch, d, len);
  +        handler.characters(m_oneChar, 0, 1); // Output single space
  +                     if(len>0)
  +                             handler.characters(ch, d, len);
  +                     else
  +                             stateForNextChunk=SUPPRESS_LEADING_WS;
       }
  +             
  +             return stateForNextChunk;
     }
     
     /**
  +   * Directly normalize and dispatch the character array.
  +   *
  +   * @param ch The characters from the XML document.
  +   * @param start The start position in the array.
  +   * @param length The number of characters to read from the array.
  +   * @param handler SAX ContentHandler object to receive the event.
  +   * @exception org.xml.sax.SAXException Any SAX exception, possibly
  +   *            wrapping another exception.
  +   */
  +  public static void sendNormalizedSAXcharacters(char ch[], 
  +             int start, int length, 
  +             org.xml.sax.ContentHandler handler)
  +          throws org.xml.sax.SAXException
  +  {
  +             sendNormalizedSAXcharacters(ch, start, length, 
  +             handler, SUPPRESS_BOTH);
  +     }
  +             
  +     /**
      * Sends the specified range of characters as sax Comment.
      * <p>
      * Note that, unlike sendSAXcharacters, this has to be done as a single 
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to