jkesselm 01/12/13 13:36:35
Modified: java/src/org/apache/xml/utils FastStringBuffer.java
Log:
Part 2b of 2: Reducing initial space requirements of a DTM. This may
involve a slight performance hit in some circumstances due to
smaller and more complex subdivision of FastStringBuffers,, but
should help stylesheets which generate a lot of Result Tree
Fragments.
(Leveraging FSB's features more strongly exposed a bug in
whitespace normalization, which I've fixed.)
Note that a better long-term answer will be to reduce the number of
DTMs tied up as RTFs
Revision Changes Path
1.15 +133 -20
xml-xalan/java/src/org/apache/xml/utils/FastStringBuffer.java
Index: FastStringBuffer.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xml/utils/FastStringBuffer.java,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -r1.14 -r1.15
--- FastStringBuffer.java 2001/10/05 15:50:11 1.14
+++ FastStringBuffer.java 2001/12/13 21:36:35 1.15
@@ -85,7 +85,29 @@
// If nonzero, forces the inial chunk size.
/**/static final int DEBUG_FORCE_INIT_BITS=0;
- /**
+ /** Manefest constant: Suppress leading whitespace.
+ * This should be used when normalize-to-SAX is called for the first
chunk of a
+ * multi-chunk output, or one following unsuppressed whitespace in a
previous
+ * chunk.
+ * @see
sendNormalizedSAXcharacters(char[],int,int,org.xml.sax.ContentHandler,int)
+ */
+ public static final int SUPPRESS_LEADING_WS=0x01;
+
+ /** Manefest constant: Suppress trailing whitespace.
+ * This should be used when normalize-to-SAX is called for the last
chunk of a
+ * multi-chunk output; it may have to be or'ed with SUPPRESS_LEADING_WS.
+ */
+ public static final int SUPPRESS_TRAILING_WS=0x02;
+
+ /** Manefest constant: Suppress both leading and trailing whitespace.
+ * This should be used when normalize-to-SAX is called for a complete
string.
+ * (I'm not wild about the name of this one. Ideas welcome.)
+ * @see
sendNormalizedSAXcharacters(char[],int,int,org.xml.sax.ContentHandler,int)
+ */
+ public static final int SUPPRESS_BOTH
+ = SUPPRESS_LEADING_WS | SUPPRESS_TRAILING_WS;
+
+ /**
* Field m_chunkBits sets our chunking strategy, by saying how many
* bits of index can be used within a single chunk before flowing over
* to the next chunk. For example, if m_chunkbits is set to 15, each
@@ -983,13 +1005,31 @@
* @param ch SAX ContentHandler object to receive the event.
* @param start Offset of first character in the range.
* @param length Number of characters to send.
+ * @return normalization status to apply to next chunk (because we may
+ * have been called recursively to process an inner FSB):
+ * <dl>
+ * <dt>0</dt>
+ * <dd>if this output did not end in retained whitespace, and thus
whitespace
+ * at the start of the following chunk (if any) should be converted to a
+ * single space.
+ * <dt>SUPPRESS_LEADING_WS</dt>
+ * <dd>if this output ended in retained whitespace, and thus whitespace
+ * at the start of the following chunk (if any) should be completely
+ * suppressed.</dd>
+ * </dd>
+ * </dl>
* @exception org.xml.sax.SAXException may be thrown by handler's
* characters() method.
*/
- public void sendNormalizedSAXcharacters(
+ public int sendNormalizedSAXcharacters(
org.xml.sax.ContentHandler ch, int start, int length)
throws org.xml.sax.SAXException
{
+ // This call always starts at the beginning of the
+ // string being written out, either because it was called directly or
+ // because it was an m_innerFSB recursion. This is important since
+ // it gives us a well-known initial state for this flag:
+ int stateForNextChunk=SUPPRESS_LEADING_WS;
int stop = start + length;
int startChunk = start >>> m_chunkBits;
@@ -1000,51 +1040,97 @@
for (int i = startChunk; i < stopChunk; ++i)
{
if (i == 0 && m_innerFSB != null)
+ stateForNextChunk=
m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn,
m_chunkSize - startColumn);
else
+ stateForNextChunk=
sendNormalizedSAXcharacters(m_array[i], startColumn,
- m_chunkSize - startColumn, ch);
+ m_chunkSize - startColumn,
+
ch,stateForNextChunk);
startColumn = 0; // after first chunk
}
// Last, or only, chunk
if (stopChunk == 0 && m_innerFSB != null)
+ stateForNextChunk= // %REVIEW% Is this update really
needed?
m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn, stopColumn -
startColumn);
else if (stopColumn > startColumn)
{
- sendNormalizedSAXcharacters(m_array[stopChunk], startColumn,
- stopColumn - startColumn, ch);
+ stateForNextChunk= // %REVIEW% Is this update really
needed?
+ sendNormalizedSAXcharacters(m_array[stopChunk],
+
startColumn, stopColumn
- startColumn,
+
ch, stateForNextChunk |
SUPPRESS_TRAILING_WS);
}
+ return stateForNextChunk;
}
static char[] m_oneChar = {' '};
-
+
/**
- * Directly normalize and dispatch the character array.
+ * Internal method to directly normalize and dispatch the character array.
+ * This version is aware of the fact that it may be called several times
+ * in succession if the data is made up of multiple "chunks", and thus
+ * must actively manage the handling of leading and trailing whitespace.
*
* @param ch The characters from the XML document.
* @param start The start position in the array.
* @param length The number of characters to read from the array.
- *
+ * @param handler SAX ContentHandler object to receive the event.
+ * @param edgeTreatmentFlags How leading/trailing spaces should be
handled.
+ * This is a bitfield contining two flags, bitwise-ORed together:
+ * <dl>
+ * <dt>SUPPRESS_LEADING_WS</dt>
+ * <dd>When false, causes leading whitespace to be converted to a single
+ * space; when true, causes it to be discarded entirely.
+ * Should be set TRUE for the first chunk, and (in multi-chunk output)
+ * whenever the previous chunk ended in retained whitespace.</dd>
+ * <dt>SUPPRESS_TRAILING_WS</dt>
+ * <dd>When false, causes trailing whitespace to be converted to a single
+ * space; when true, causes it to be discarded entirely.
+ * Should be set TRUE for the last or only chunk.
+ * </dd>
+ * </dl>
+ * @return normalization status, as in the edgeTreatmentFlags parameter:
+ * <dl>
+ * <dt>0</dt>
+ * <dd>if this output did not end in retained whitespace, and thus
whitespace
+ * at the start of the following chunk (if any) should be converted to a
+ * single space.
+ * <dt>SUPPRESS_LEADING_WS</dt>
+ * <dd>if this output ended in retained whitespace, and thus whitespace
+ * at the start of the following chunk (if any) should be completely
+ * suppressed.</dd>
+ * </dd>
+ * </dl>
* @exception org.xml.sax.SAXException Any SAX exception, possibly
* wrapping another exception.
*/
- public static void sendNormalizedSAXcharacters(char ch[],
+ static int sendNormalizedSAXcharacters(char ch[],
int start, int length,
- org.xml.sax.ContentHandler handler)
+ org.xml.sax.ContentHandler handler,
+ int edgeTreatmentFlags)
throws org.xml.sax.SAXException
{
+ int stateForNextChunk=0; // Initially, assume no retained
trailing spaces.
+
int end = length + start;
- int s;
- for (s = start; s < end; s++)
- {
- char c = ch[s];
- if(!XMLCharacterRecognizer.isWhiteSpace(c))
- break;
- }
+ int s=start;
+
+ // Leading whitespaces should be _completely_ suppressed if and
only if
+ // (a) we're the first chunk in the normalized sequence or (b)
the
+ // previous chunk ended in a normalized-but-not-suppressed
whitespace.
+ if(0!= (edgeTreatmentFlags&SUPPRESS_LEADING_WS) )
+ for (; s < end; s++)
+ {
+ char c = ch[s];
+ if(!XMLCharacterRecognizer.isWhiteSpace(c))
+ break;
+ }
+ // Normal processing converts multiple whitespace characters
into
+ // a single whitespace
boolean whiteSpaceFound = false;
boolean needToFlushSpace = false;
int d = s;
@@ -1115,15 +1201,42 @@
int len = (s-d);
- if(len > 0)
+ // If we aren't at the end of the (possibly multi-chunk) text,
+ // we should ouput the single space even if there is nothing
+ // following it in this chunk
+ if(len > 0 || 0==(edgeTreatmentFlags&SUPPRESS_TRAILING_WS) )
{
if(needToFlushSpace)
- handler.characters(m_oneChar, 0, 1);
- handler.characters(ch, d, len);
+ handler.characters(m_oneChar, 0, 1); // Output single space
+ if(len>0)
+ handler.characters(ch, d, len);
+ else
+ stateForNextChunk=SUPPRESS_LEADING_WS;
}
+
+ return stateForNextChunk;
}
/**
+ * Directly normalize and dispatch the character array.
+ *
+ * @param ch The characters from the XML document.
+ * @param start The start position in the array.
+ * @param length The number of characters to read from the array.
+ * @param handler SAX ContentHandler object to receive the event.
+ * @exception org.xml.sax.SAXException Any SAX exception, possibly
+ * wrapping another exception.
+ */
+ public static void sendNormalizedSAXcharacters(char ch[],
+ int start, int length,
+ org.xml.sax.ContentHandler handler)
+ throws org.xml.sax.SAXException
+ {
+ sendNormalizedSAXcharacters(ch, start, length,
+ handler, SUPPRESS_BOTH);
+ }
+
+ /**
* Sends the specified range of characters as sax Comment.
* <p>
* Note that, unlike sendSAXcharacters, this has to be done as a single
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]