jkesselm 01/03/05 11:55:01
Modified: java/src/org/apache/xalan/client XSLTProcessorApplet.java
java/src/org/apache/xalan/processor StylesheetHandler.java
TransformerFactoryImpl.java
java/src/org/apache/xalan/stree DocImpl.java
StreeDOMBuilder.java TextImpl.java
java/src/org/apache/xalan/templates OutputProperties.java
java/src/org/apache/xalan/xslt Process.java
java/src/org/apache/xml/utils FastStringBuffer.java
java/src/trax trax.properties
Log:
New approach to FastStringBuffer, and changes needed to support it.
We still need to do some serious work on both the initial values of the
tuning parameters and the heuristics driving the "chunk growth" algorithms.
Currently set for fixed-sized chunks of 8Kch in main tree, .5Kch in RTF.
Revision Changes Path
1.16 +1 -1
xml-xalan/java/src/org/apache/xalan/client/XSLTProcessorApplet.java
Index: XSLTProcessorApplet.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/client/XSLTProcessorApplet.java,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -r1.15 -r1.16
--- XSLTProcessorApplet.java 2001/01/02 03:36:32 1.15
+++ XSLTProcessorApplet.java 2001/03/05 19:54:40 1.16
@@ -718,7 +718,7 @@
e.printStackTrace();
System.exit(-1);
}
-
+
this.showStatus("Transformation Done!");
htmlData = osw.toString();
}
1.36 +4 -2
xml-xalan/java/src/org/apache/xalan/processor/StylesheetHandler.java
Index: StylesheetHandler.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/processor/StylesheetHandler.java,v
retrieving revision 1.35
retrieving revision 1.36
diff -u -r1.35 -r1.36
--- StylesheetHandler.java 2001/02/01 04:29:46 1.35
+++ StylesheetHandler.java 2001/03/05 19:54:42 1.36
@@ -457,8 +457,10 @@
pushSpaceHandling(false);
}
-
- /** support for isParsingComplete */
+ /** m_parsingComplete becomes true when the top-level stylesheet and all
+ * its included/imported stylesheets have been been fully parsed, as an
+ * indication that composition/optimization/compilation can begin.
+ * @see isStylesheetParsingComplete */
private boolean m_parsingComplete = false;
/**
1.25 +2 -1
xml-xalan/java/src/org/apache/xalan/processor/TransformerFactoryImpl.java
Index: TransformerFactoryImpl.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/processor/TransformerFactoryImpl.java,v
retrieving revision 1.24
retrieving revision 1.25
diff -u -r1.24 -r1.25
--- TransformerFactoryImpl.java 2001/02/12 19:31:14 1.24
+++ TransformerFactoryImpl.java 2001/03/05 19:54:43 1.25
@@ -764,7 +764,8 @@
factory.setNamespaceAware(true);
- javax.xml.parsers.SAXParser jaxpParser = factory.newSAXParser();
+
+ javax.xml.parsers.SAXParser jaxpParser =
factory.newSAXParser();
reader = jaxpParser.getXMLReader();
}
1.6 +2 -1 xml-xalan/java/src/org/apache/xalan/stree/DocImpl.java
Index: DocImpl.java
===================================================================
RCS file: /home/cvs/xml-xalan/java/src/org/apache/xalan/stree/DocImpl.java,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- DocImpl.java 2001/01/29 11:49:03 1.5
+++ DocImpl.java 2001/03/05 19:54:47 1.6
@@ -95,7 +95,8 @@
public DocImpl()
{
super(null);
- m_chars = new FastStringBuffer(1024 * 8);
+ // Just an initial guess at reasonable tuning parameters
+ m_chars = new FastStringBuffer(13,13);
// m_id = m_idCount++;
}
1.15 +2 -3
xml-xalan/java/src/org/apache/xalan/stree/StreeDOMBuilder.java
Index: StreeDOMBuilder.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/stree/StreeDOMBuilder.java,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -r1.14 -r1.15
--- StreeDOMBuilder.java 2001/01/11 18:40:09 1.14
+++ StreeDOMBuilder.java 2001/03/05 19:54:48 1.15
@@ -387,9 +387,8 @@
if (m_previousIsText && !isText)
{
- if (!(m_docImpl.m_sourceTreeHandler.getShouldStripWhitespace()
- &&
XMLCharacterRecognizer.isWhiteSpace(m_text_buffer.m_doc.m_chars.m_map,
- m_text_buffer.m_start, m_text_buffer.m_length)))
+ if (!( m_docImpl.m_sourceTreeHandler.getShouldStripWhitespace()
+ && m_text_buffer.isWhitespace() ))
{
append(m_text_buffer);
}
1.13 +14 -8 xml-xalan/java/src/org/apache/xalan/stree/TextImpl.java
Index: TextImpl.java
===================================================================
RCS file: /home/cvs/xml-xalan/java/src/org/apache/xalan/stree/TextImpl.java,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -r1.12 -r1.13
--- TextImpl.java 2001/01/07 03:59:23 1.12
+++ TextImpl.java 2001/03/05 19:54:49 1.13
@@ -91,7 +91,7 @@
* @return the length of text in the text buffer. */
public int getLengthInBuffer()
{
- return m_start;
+ return m_length;
}
/**
@@ -130,15 +130,16 @@
synchronized(fsb)
{
- m_start = fsb.m_firstFree;
+ m_start = fsb.length(); // Isolate from internal representation
m_length = length;
fsb.append(ch, start, length);
}
}
/**
- * Handle a Characters event
- *
+ * Send content of this text node as a SAX Characters event. Handles both
+ * the case where the Text node has stored its data as a local string and
+ * that in which the data has been placed in a FastStringBuffer pool.
*
* @param ch Content handler to handle SAX events
*
@@ -153,8 +154,8 @@
else
{
synchronized(m_doc.m_chars)
- {
- ch.characters(m_doc.m_chars.m_map, m_start, m_length);
+ {
+ m_doc.m_chars.sendSAXcharacters(ch,m_start,m_length);
}
}
}
@@ -211,7 +212,7 @@
{
synchronized(m_doc.m_chars)
{
- m_data = new String(m_doc.m_chars.m_map, m_start, m_length);
+ m_data = m_doc.m_chars.getString(m_start, m_length);
}
}
@@ -242,7 +243,7 @@
{
synchronized(m_doc.m_chars)
{
- m_data = new String(m_doc.m_chars.m_map, m_start, m_length);
+ m_data = m_doc.m_chars.getString(m_start, m_length);
}
}
@@ -272,6 +273,11 @@
m_data.concat(ch.toString());
m_length += length;
+ }
+
+ public boolean isWhitespace()
+ {
+ return m_doc.m_chars.isWhitespace(m_start,m_length);
}
}
1.11 +3 -1
xml-xalan/java/src/org/apache/xalan/templates/OutputProperties.java
Index: OutputProperties.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/templates/OutputProperties.java,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- OutputProperties.java 2001/01/02 03:36:47 1.10
+++ OutputProperties.java 2001/03/05 19:54:53 1.11
@@ -666,7 +666,9 @@
{
int s = v.size();
- FastStringBuffer fsb = new FastStringBuffer();
+
+ // Just an initial guess at reasonable tuning parameters
+ FastStringBuffer fsb = new FastStringBuffer(9,9);
for (int i = 0; i < s; i++)
{
1.28 +1 -0 xml-xalan/java/src/org/apache/xalan/xslt/Process.java
Index: Process.java
===================================================================
RCS file: /home/cvs/xml-xalan/java/src/org/apache/xalan/xslt/Process.java,v
retrieving revision 1.27
retrieving revision 1.28
diff -u -r1.27 -r1.28
--- Process.java 2001/02/28 16:44:58 1.27
+++ Process.java 2001/03/05 19:54:55 1.28
@@ -636,4 +636,5 @@
diagnosticsWriter.println(""); //"Xalan: done");
}
}
+
}
1.5 +588 -106
xml-xalan/java/src/org/apache/xml/utils/FastStringBuffer.java
Index: FastStringBuffer.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xml/utils/FastStringBuffer.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- FastStringBuffer.java 2000/12/12 19:10:24 1.4
+++ FastStringBuffer.java 2001/03/05 19:54:58 1.5
@@ -2,7 +2,7 @@
* The Apache Software License, Version 1.1
*
*
- * Copyright (c) 1999 The Apache Software Foundation. All rights
+ * Copyright (c) 1999 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -61,58 +61,157 @@
* parameter range checking, exposed fields. Note that in typical
* applications, thread-safety of a StringBuffer is a somewhat
* dubious concept in any case.
- */
+ * <p>
+ * Note that Stree is using a single FastStringBuffer as a string pool,
+ * by recording start and length indices within a single buffer. This
+ * minimizes heap overhead, but of course requires more work when retrieving
+ * the data.
+ * <p>
+ * This has been recoded to operate as a "chunked buffer". Doing so
+ * reduces (or, when initial chunk size equals max chunk size,
+ * eliminates) the need to recopy existing information when an append
+ * exceeds the space available; we just allocate another chunk and
+ * flow across to it. (The array of chunks may need to grow,
+ * admittedly, but that's a much smaller object.) Some excess
+ * recopying may arise when we extract Strings which cross chunk
+ * boundaries; larger chunks make that less frequent. <p> The size
+ * values are parameterized, to allow tuning this code. In theory,
+ * RTFs might want to be tuned differently from the main document's
+ * text.
+ * <p>
+ * STATUS: I'm not getting as much performance gain out of this as I'd
+ * hoped, nor is the relationship between the tuning parameters and
+ * performance particularly intuitive. Under some conditions I do seem
+ * to be able to knock up to 19% off the execution time for our
+ * largest testcases, which is certainly nontrivial... but that same
+ * setting (fixed 1024-byte chunking for both main tree and RTFs)
+ * seems to _add_ that much proportional overhead to some of the
+ * smaller ones. We need to understand this better, improve the
+ * parameter selection/growth heuristics, and perhaps (if all else
+ * fails) consider exposing those parameters for advanced users to
+ * fiddle with as suits their needs. */
public class FastStringBuffer
{
+ /** Field m_chunkBits sets our chunking strategy, by saying how many
+ * bits of index can be used within a single chunk before flowing over
+ * to the next chunk. For example, if m_chunkbits is set to 15, each
+ * chunk can contain up to 2^15 (32K) characters */
+ int m_chunkBits=15;
+
+ /** Field m_chunkSize establishes the maximum size of one chunk of the
array
+ * as 2**chunkbits characters.
+ * (Which may also be the minimum size if we aren't tuning for storage) */
+ int m_chunkSize; // =1<<(m_chunkBits-1);
+
+ /** Field m_chunkMask is m_chunkSize-1 -- in other words, m_chunkBits
+ * worth of low-order '1' bits, useful for shift-and-mask addressing
+ * within the chunks. */
+ int m_chunkMask; // =m_chunkSize-1;
+
+ /** Field m_chunkAllocationUnit establishes the initial allocation size for
+ * a chunk, and the amount by which the chunk is enlarged. This value is
+ * automatically increased for each chunk -- that is, if the first chunk's
+ * allocation unit is 1024 characters, the next may allocate in units of
+ * 2048 characters, and so on. The intent is that small documents (such as
+ * Result Tree Fragments) will minimize memory use at the expense of
spending
+ * more time recopying data, while large ones will minimize recopying at
+ * the expense of wasting more storage due to overallocation.
+ * <p>
+ * The best compromise between block size, allocation unit size, and
+ * rate of growth of allocation units is still to be determined.
+ */
+ int m_chunkAllocationUnit=1024;
+
+ /** Field m_array holds the string buffer's text contents, using an
+ * array-of-arrays. Note that this array, and the arrays it contains, may
be
+ * reallocated when necessary in order to allow the buffer to grow;
+ * references to them should be considered to be invalidated after any
+ * append. However, the only time these arrays are directly exposed
+ * is in the sendSAXcharacters call.
+ */
+ char[][] m_array;
+
+ /** Field m_lastChunk is an index into m_array[], pointing to the last
+ * chunk of the Chunked Array currently in use. Note that additional
+ * chunks may actually be allocated, eg if the FastStringBuffer had
+ * previously been truncated or if someone issued an ensureSpace request.
+ * <p>
+ * The insertion point for append operations is addressed by the
combination
+ * of m_lastChunk and m_firstFree.
+ */
+ int m_lastChunk = 0;
- /** Field m_blocksize establishes the allocation granularity -- the
- * initial size of m_map[] and the minimum increment by which it grows
- * when necessary. */
- public int m_blocksize;
-
- /** Field m_map[] is a character array holding the string buffer's
- * contents. Note that this array will be reallocated when necessary
- * in order to allow the buffer to grow, so references to this
- * object may become (indedetectably) invalid after any edit is
- * made to the FastStringBuffer. DO NOT retain such references!
- * (Note that this imposes a multithreading hazard; its the user's
- * responsibility to manage access to FastStringBuffer to prevent
- * the problem from arising.) */
- public char m_map[];
-
- /** Field m_firstFree is an index into m_map[], pointing to the first
- * character in the array which is not part of the FastStringBuffer's
- * current content. Since m_map[] is zero-based, m_firstFree is also
- * equal to the length of that content. */
- public int m_firstFree = 0;
-
- /** Field m_mapSize is a cached copy of m_map.length -- or,
- * transiently, the new length that m_map will grow to. */
- public int m_mapSize;
+ /** Field m_firstFree is an index into m_array[m_lastChunk][], pointing to
+ * the first character in the Chunked Array which is not part of the
+ * FastStringBuffer's current content. Since m_array[][] is zero-based,
+ * the length of that content can be calculated as
+ * (m_lastChunk<<m_chunkBits) + m_firstFree */
+ int m_firstFree = 0;
+ /** Field m_lastChunkSize is a cached copy of m_array[m_lastChunk].length
+ */
+ int m_lastChunkSize; // = m_initialChunkSize;
+
/**
- * Construct a IntVector, using the default block size.
+ * Construct a FastStringBuffer, with allocation policy as per parameters.
+ * <p>
+ * For coding convenience, I've expressed both allocation sizes in terms of
+ * a number of bits. That's needed for the final size of a chunk,
+ * to permit fast and efficient shift-and-mask addressing. It's less
critical
+ * for the inital size, and may be reconsidered.
+ * <p>
+ * An alternative would be to accept integer sizes and round to powers of
two;
+ * that's under consideration.
+ *
+ * @param initialChunkBits Length in characters of the initial allocation
+ * of a chunk, expressed in log-base-2. (That is, 10 means allocate 1024
+ * characters.) Later chunks will use larger allocation units, to trade off
+ * allocation speed of large document against storage efficiency of small
+ * ones.
+ * @param chunkBits Number of character-offset bits that should be used for
+ * addressing within a chunk. Maximum length of a chunk is 2^chunkBits
+ * characters.
+ */
+ public FastStringBuffer(int initialChunkBits,int chunkBits)
+ {
+ m_array = new char[16][];
+
+ // Don't bite off more than we're prepared to swallow!
+ if(initialChunkBits>chunkBits)
+ initialChunkBits=chunkBits;
+ m_chunkBits=chunkBits;
+ m_chunkSize=1<<(chunkBits);
+ m_chunkMask=m_chunkSize-1;
+
+ m_lastChunkSize=m_chunkAllocationUnit=1<<(initialChunkBits);
+ m_array[0] = new char[m_lastChunkSize];
+ }
+
+ /**
+ * Construct a FastStringBuffer, using a default allocation policy.
*/
public FastStringBuffer()
{
-
- m_blocksize = 1024;
- m_mapSize = 1024;
- m_map = new char[1024];
+ // 10 bits is 1K. 15 bits is 32K. Remember that these are character
+ // counts, so actual memory allocation unit is doubled for UTF-16 chars.
+ //
+ // For reference: In the original FastStringBuffer, we simply
+ // overallocated by blocksize (default 1KB) on each buffer-growth.
+ this(10,15);
}
/**
- * Construct a IntVector, using the given block size.
+ * Construct a FastStringBuffer, using the specified initial unit.
+ * Resembles the previous version of this code.
+ * <p>
+ * ISSUE: Should this be considered initial size, or fixed size?
+ * Now configured as initial.
*
- * @param blocksize Desired value for m_blocksize, establishes both
- * the initial storage allocation and the minimum growth increment.
+ * @param chunkSize Characters per chunk; will round up to power of 2.
*/
- public FastStringBuffer(int blocksize)
+ public FastStringBuffer(int initialAllocationUnit)
{
-
- m_blocksize = blocksize;
- m_mapSize = blocksize;
- m_map = new char[blocksize];
+ this(initialAllocationUnit, 15);
}
/**
@@ -122,7 +221,7 @@
*/
public final int size()
{
- return m_firstFree;
+ return (m_lastChunk<<m_chunkBits) + m_firstFree;
}
/**
@@ -132,7 +231,7 @@
*/
public final int length()
{
- return m_firstFree;
+ return (m_lastChunk<<m_chunkBits) + m_firstFree;
}
/**
@@ -141,6 +240,7 @@
*/
public final void reset()
{
+ m_lastChunk = 0;
m_firstFree = 0;
}
@@ -150,49 +250,37 @@
* operation. It is not protected against negative values, or values
* greater than the amount of storage currently available... and even
* if additional storage does exist, its contents are unpredictable.
- * The only safe use for setLength() is to truncate the FastStringBuffer
+ * The only safe use for our setLength() is to truncate the
FastStringBuffer
* to a shorter string.
+ * <p>
+ * QUERY: Given that this operation will be used relatively rarely,
+ * does it really need to be so highly optimized?
*
- * @param l New length. If l<0 or l>=m_mapSize, this operation will
+ * @param l New length. If l<0 or l>=getLength(), this operation will
* not report an error but future operations will almost certainly fail.
*/
public final void setLength(int l)
- {
- m_firstFree = l;
- }
-
- /**
- * @return the contents of the FastStringBuffer as a standard Java string
- */
- public final String toString()
{
- return new String(m_map, 0, m_firstFree);
+ m_lastChunk = l >>> m_chunkBits;
+ m_firstFree = l & m_chunkMask;
}
/**
- * Ensure that the FastStringBuffer has at least the specified amount
- * of unused space in m_map[]. If necessary, this operation will
- * enlarge the buffer, overallocating by blocksize so we're less
- * likely to immediately need to grow again.
+ * Note that this operation has been somewhat deoptimized by the shift to a
+ * chunked array, as there is no factory method to produce a String object
+ * directly from an array of arrays and hence a double copy is needed.
+ * By using ensureCapacity we hope to minimize the heap overhead of
building
+ * the intermediate StringBuffer.
* <p>
- * NOTE THAT after calling ensureFreeSpace(), previously obtained
- * references to m_map[] may no longer be valid.
- *
- * @param newSize the required amount of "free space".
+ * (It really is a pity that Java didn't design String as a final subclass
+ * of MutableString, rather than having StringBuffer be a separate
hierarchy.
+ * We'd avoid a <strong>lot</strong> of double-buffering.)
+ *
+ * @return the contents of the FastStringBuffer as a standard Java string.
*/
- private final void ensureFreeSpace(int newSize)
+ public final String toString()
{
-
- if ((m_firstFree + newSize) >= m_mapSize)
- {
- m_mapSize += (newSize + m_blocksize);
-
- char newMap[] = new char[m_mapSize];
-
- System.arraycopy(m_map, 0, newMap, 0, m_firstFree + 1);
-
- m_map = newMap;
- }
+ return getString(0,0,(m_lastChunk<<m_chunkBits)+m_firstFree);
}
/**
@@ -200,18 +288,60 @@
* storage if necessary.
* <p>
* NOTE THAT after calling append(), previously obtained
- * references to m_map[] may no longer be valid.
+ * references to m_array[][] may no longer be valid....
+ * though in fact they should be in this instance.
*
* @param value character to be appended.
*/
public final void append(char value)
{
+ char[] chunk;
+
+ // We may have preallocated chunks. If so, all but last should
+ // be at full size.
+ boolean lastchunk=(m_lastChunk+1==m_array.length);
+
+ if(m_firstFree<m_lastChunkSize) // Simplified test single-character-fits
+ chunk=m_array[m_lastChunk];
+
+ else if (m_lastChunkSize<m_chunkSize)
+ {
+ // Grow chunk. Only arises if this is most recent chunk allocated,
+ // as earlier ones will already be at full size.
+ int newsize=m_lastChunkSize+m_chunkAllocationUnit;
+ chunk=new char[newsize];
+ System.arraycopy(m_array[m_lastChunk],0,chunk,0,m_firstFree);
+ m_array[m_lastChunk]=chunk;
+ m_lastChunkSize=newsize;
+ }
+ else
+ {
+ // Extend array?
+ int i=m_array.length;
+ if(m_lastChunk+1==i)
+ {
+ char[][] newarray=new char[i+16][];
+ System.arraycopy(m_array,0,newarray,0,i);
+ m_array=newarray;
+ }
+
+ // Advance one chunk
+ chunk=m_array[++m_lastChunk];
+ if(chunk==null)
+ {
+ // Add a chunk. Allocate bigger pieces this time.
+ if(m_chunkAllocationUnit<m_chunkSize)
+ m_chunkAllocationUnit<<=1;
+ chunk=m_array[m_lastChunk]=new
char[m_lastChunkSize=m_chunkAllocationUnit];
+ }
+ else // Previously allocated
+ m_lastChunkSize=chunk.length;
+
+ m_firstFree=0;
+ }
- ensureFreeSpace(1);
-
- m_map[m_firstFree] = value;
-
- m_firstFree++;
+ // Space exists in the chunk. Append the character.
+ chunk[m_firstFree++]=value;
}
/**
@@ -219,19 +349,75 @@
* growing the storage if necessary.
* <p>
* NOTE THAT after calling append(), previously obtained
- * references to m_map[] may no longer be valid.
+ * references to m_array[] may no longer be valid.
*
* @param value String whose contents are to be appended.
*/
public final void append(String value)
{
-
- int len = value.length();
-
- ensureFreeSpace(len);
- value.getChars(0, len, m_map, m_firstFree);
-
- m_firstFree += len;
+ int strlen=value.length();
+ int copyfrom=0;
+ char[] chunk=m_array[m_lastChunk];
+ int available=m_lastChunkSize-m_firstFree;
+
+ // Repeat while data remains to be copied
+ while(strlen>0)
+ {
+ // Enlarge chunk if necessary/possible (up to maximum)
+ int newsize=(
+ (m_firstFree+strlen // needed space
++m_chunkAllocationUnit-1) // round up
+ &
+ ~(m_chunkAllocationUnit-1)); // round off
+ if(newsize>m_chunkSize) // Not to exceed maximum.
+ newsize=m_chunkSize;
+
+ if(newsize!=m_lastChunkSize) // We're enlarging!
+ {
+ char[] newchunk=new char[newsize];
+ if(chunk!=null)
+ System.arraycopy(chunk,0,newchunk,0,m_firstFree);
+ chunk=m_array[m_lastChunk]=newchunk;
+ m_lastChunkSize=newsize;
+ available=newsize-m_firstFree;
+ }
+
+ // Copy what fits
+ if(available>strlen) available=strlen;
+ value.getChars(copyfrom, available, m_array[m_lastChunk],
m_firstFree);
+ strlen-=available;
+ copyfrom+=available;
+
+ // If there's more left, allocate another chunk and continue
+ if(strlen>0)
+ {
+ // Extend array?
+ int i=m_array.length;
+ if(m_lastChunk+1==i)
+ {
+ char[][] newarray=new char[i+16][];
+ System.arraycopy(m_array,0,newarray,0,i);
+ m_array=newarray;
+ }
+
+ // Advance one chunk
+ chunk=m_array[++m_lastChunk];
+ if(chunk==null)
+ {
+ if(m_chunkAllocationUnit<m_chunkSize)
+ m_chunkAllocationUnit<<=1;
+ available=m_lastChunkSize=m_chunkAllocationUnit;
+ chunk=m_array[m_lastChunk]=new char[m_lastChunkSize];
+ }
+ else
+ available=m_lastChunkSize=chunk.length;
+
+ m_firstFree=0;
+ }
+ }
+
+ // Adjust the insert point in the last chunk, when we've reached it.
+ m_firstFree+=available;
}
/**
@@ -239,19 +425,75 @@
* growing the storage if necessary.
* <p>
* NOTE THAT after calling append(), previously obtained
- * references to m_map[] may no longer be valid.
+ * references to m_array[] may no longer be valid.
*
* @param value StringBuffer whose contents are to be appended.
*/
public final void append(StringBuffer value)
{
-
- int len = value.length();
-
- ensureFreeSpace(len);
- value.getChars(0, len, m_map, m_firstFree);
-
- m_firstFree += len;
+ int strlen=value.length();
+ int copyfrom=0;
+ char[] chunk=m_array[m_lastChunk];
+ int available=m_lastChunkSize-m_firstFree;
+
+ // Repeat while data remains to be copied
+ while(strlen>0)
+ {
+ // Enlarge chunk if necessary/possible (up to maximum)
+ int newsize=(
+ (m_firstFree+strlen // needed space
++m_chunkAllocationUnit-1) // round up
+ &
+ ~(m_chunkAllocationUnit-1)); // round off
+ if(newsize>m_chunkSize) // Not to exceed maximum.
+ newsize=m_chunkSize;
+
+ if(newsize!=m_lastChunkSize) // We're enlarging!
+ {
+ char[] newchunk=new char[newsize];
+ if(chunk!=null)
+ System.arraycopy(chunk,0,newchunk,0,m_firstFree);
+ chunk=m_array[m_lastChunk]=newchunk;
+ m_lastChunkSize=newsize;
+ available=newsize-m_firstFree;
+ }
+
+ // Copy what fits
+ if(available>strlen) available=strlen;
+ value.getChars(copyfrom, available, m_array[m_lastChunk],
m_firstFree);
+ strlen-=available;
+ copyfrom+=available;
+
+ // If there's more left, allocate another chunk and continue
+ if(strlen>0)
+ {
+ // Extend array?
+ int i=m_array.length;
+ if(m_lastChunk+1==i)
+ {
+ char[][] newarray=new char[i+16][];
+ System.arraycopy(m_array,0,newarray,0,i);
+ m_array=newarray;
+ }
+
+ // Advance one chunk
+ chunk=m_array[++m_lastChunk];
+ if(chunk==null)
+ {
+ if(m_chunkAllocationUnit<m_chunkSize)
+ m_chunkAllocationUnit<<=1;
+ available=m_lastChunkSize=m_chunkAllocationUnit;
+ chunk=m_array[m_lastChunk]=new char[m_lastChunkSize];
+ }
+ else
+ available=m_lastChunkSize=chunk.length;
+
+ m_firstFree=0;
+ }
+ }
+
+ // Adjust the insert point in the last chunk, when we've reached it.
+ m_firstFree+=available;
}
/**
@@ -259,7 +501,7 @@
* FastStringBuffer, growing the storage if necessary.
* <p>
* NOTE THAT after calling append(), previously obtained
- * references to m_map[] may no longer be valid.
+ * references to m_array[] may no longer be valid.
*
* @param chars character array from which data is to be copied
* @param start offset in chars of first character to be copied,
@@ -268,11 +510,69 @@
*/
public final void append(char[] chars, int start, int length)
{
-
- ensureFreeSpace(length);
- System.arraycopy(chars, start, m_map, m_firstFree, length);
-
- m_firstFree += length;
+ int strlen=length;
+ int copyfrom=start;
+ char[] chunk=m_array[m_lastChunk];
+ int available=m_lastChunkSize-m_firstFree;
+
+ // Repeat while data remains to be copied
+ while(strlen>0)
+ {
+ // Enlarge chunk if necessary/possible (up to maximum) *****
+ int newsize=(
+ (m_firstFree+strlen // needed space
++m_chunkAllocationUnit-1) // round up
+ &
+ ~(m_chunkAllocationUnit-1)); // round off
+ if(newsize>m_chunkSize) // Not to exceed maximum.
+ newsize=m_chunkSize;
+
+ if(newsize!=m_lastChunkSize) // We're enlarging!
+ {
+ char[] newchunk=new char[newsize];
+ if(chunk!=null)
+ System.arraycopy(chunk,0,newchunk,0,m_firstFree);
+ chunk=m_array[m_lastChunk]=newchunk;
+ m_lastChunkSize=newsize;
+ available=newsize-m_firstFree;
+ }
+
+ // Copy what fits
+ if(available>strlen) available=strlen;
+ System.arraycopy(chars,copyfrom, m_array[m_lastChunk], m_firstFree,
available);
+ strlen-=available;
+ copyfrom+=available;
+
+ // If there's more left, allocate another chunk and continue
+ if(strlen>0)
+ {
+ // Extend array?
+ int i=m_array.length;
+ if(m_lastChunk+1==i)
+ {
+ char[][] newarray=new char[i+16][];
+ System.arraycopy(m_array,0,newarray,0,i);
+ m_array=newarray;
+ }
+
+ // Advance one chunk
+ chunk=m_array[++m_lastChunk];
+ if(chunk==null)
+ {
+ if(m_chunkAllocationUnit<m_chunkSize)
+ m_chunkAllocationUnit<<=1;
+ available=m_lastChunkSize=m_chunkAllocationUnit;
+ chunk=m_array[m_lastChunk]=new char[m_lastChunkSize];
+ }
+ else
+ available=m_lastChunkSize=chunk.length;
+
+ m_firstFree=0;
+ }
+ }
+
+ // Adjust the insert point in the last chunk, when we've reached it.
+ m_firstFree+=available;
}
/**
@@ -280,19 +580,201 @@
* this FastStringBuffer, growing the storage if necessary.
* <p>
* NOTE THAT after calling append(), previously obtained
- * references to m_map[] may no longer be valid.
+ * references to m_array[] may no longer be valid.
*
* @param value FastStringBuffer whose contents are
* to be appended.
*/
public final void append(FastStringBuffer value)
{
-
- int length = value.m_firstFree;
+ // Complicating factor here is that the two buffers may use
+ // different chunk sizes, and even if they're the same we're
+ // probably on a different alignment due to previously appended
+ // data. We have to work through the source in bite-sized chunks.
+ int strlen=value.length();
+ int copyfrom=0;
+ char[] chunk=m_array[m_lastChunk];
+ int available=m_lastChunkSize-m_firstFree;
+
+ // Repeat while data remains to be copied
+ while(strlen>0)
+ {
+ // Enlarge chunk if necessary/possible (up to maximum)
+ int newsize=(
+ (m_firstFree+strlen // needed space
++m_chunkAllocationUnit-1) // round up
+ &
+ ~(m_chunkAllocationUnit-1)); // round off
+ if(newsize>m_chunkSize) // Not to exceed maximum.
+ newsize=m_chunkSize;
+
+ if(newsize!=m_lastChunkSize) // We're enlarging!
+ {
+ char[] newchunk=new char[newsize];
+ if(chunk!=null)
+ System.arraycopy(chunk,0,newchunk,0,m_firstFree);
+ chunk=m_array[m_lastChunk]=newchunk;
+ m_lastChunkSize=newsize;
+ available=newsize-m_firstFree;
+ }
+
+ // Copy what fits
+ if(available>strlen) available=strlen;
+
+ int sourcechunk=(copyfrom+value.m_chunkSize-1)>>>value.m_chunkBits;
+ int sourcecolumn=copyfrom & value.m_chunkMask;
+ int runlength=value.m_chunkSize-sourcecolumn;
+ if(runlength>available) runlength=available;
+ System.arraycopy(value.m_array[sourcechunk],sourcecolumn,
+ m_array[m_lastChunk], m_firstFree, runlength);
+ if(runlength!=available)
+ System.arraycopy(value.m_array[sourcechunk+1],0,
+ m_array[m_lastChunk], m_firstFree+runlength,
available-runlength);
+
+ strlen-=available;
+ copyfrom+=available;
+
+ // If there's more left, allocate another chunk and continue
+ if(strlen>0)
+ {
+ // Extend array?
+ int i=m_array.length;
+ if(m_lastChunk+1==i)
+ {
+ char[][] newarray=new char[i+16][];
+ System.arraycopy(m_array,0,newarray,0,i);
+ m_array=newarray;
+ }
+
+ // Advance one chunk
+ chunk=m_array[++m_lastChunk];
+ if(chunk==null)
+ {
+ if(m_chunkAllocationUnit<m_chunkSize)
+ m_chunkAllocationUnit<<=1;
+ available=m_lastChunkSize=m_chunkAllocationUnit;
+ chunk=m_array[m_lastChunk]=new char[m_lastChunkSize];
+ }
+ else
+ available=m_lastChunkSize=chunk.length;
+
+ m_firstFree=0;
+ }
+ }
+
+ // Adjust the insert point in the last chunk, when we've reached it.
+ m_firstFree+=available;
+ }
- ensureFreeSpace(length);
- System.arraycopy(value.m_map, 0, m_map, m_firstFree, length);
+ /** @return true if the specified range of characters are all whitespace,
+ * as defined by XMLCharacterRecognizer.
+ * <p>
+ * CURRENTLY DOES NOT CHECK FOR OUT-OF-RANGE.
+ *
+ * @param start Offset of first character in the range.
+ * @param length Number of characters to send.
+ */
+ public boolean isWhitespace(int start, int length)
+ {
+ int sourcechunk=start >>> m_chunkBits;
+ int sourcecolumn=start & m_chunkMask;
+ int available=m_chunkSize-sourcecolumn;
+
+ while(length>0)
+ {
+ int runlength=(length<=available) ? length : available;
+ if(!org.apache.xml.utils.XMLCharacterRecognizer.isWhiteSpace(
+
m_array[sourcechunk],sourcecolumn,runlength))
+ return false;
+
+ length-=runlength;
+ ++sourcechunk;
+ sourcecolumn=0;
+ available=m_chunkSize;
+ }
+
+ return true;
+ }
+
+ /** @return a new String object initialized from the specified range of
+ * characters.
+ * @param start Offset of first character in the range.
+ * @param length Number of characters to send.
+ */
+ public String getString(int start, int length)
+ {
+ return getString(start>>>m_chunkBits,start&m_chunkMask,length);
+ }
+
+ /** Internal support for toString() and getString().
+ *
+ * Note that this operation has been somewhat deoptimized by the shift to a
+ * chunked array, as there is no factory method to produce a String object
+ * directly from an array of arrays and hence a double copy is needed.
+ * By presetting length we hope to minimize the heap overhead of building
+ * the intermediate StringBuffer.
+ * <p>
+ * (It really is a pity that Java didn't design String as a final subclass
+ * of MutableString, rather than having StringBuffer be a separate
hierarchy.
+ * We'd avoid a <strong>lot</strong> of double-buffering.)
+ *
+ * @return the contents of the FastStringBuffer as a standard Java string.
+ */
+ String getString(int startChunk,int startColumn,int length)
+ {
+ int stop=(startChunk<<m_chunkBits)+startColumn+length;
+ int stopChunk=stop>>>m_chunkBits;
+ int stopColumn=stop&m_chunkMask;
+
+ StringBuffer sb=new StringBuffer(length);
+
+ for(int i=startChunk;i<stopChunk;++i)
+ {
+ sb.append(m_array[i],startColumn,m_chunkSize-startColumn);
+ startColumn=0; // after first chunk
+ }
+
+ // Last, or only, chunk
+ sb.append(m_array[stopChunk],startColumn,stopColumn-startColumn);
+
+ return sb.toString();
+ }
+
+ /** Sends the specified range of characters as one or more SAX characters()
+ * events.
+ * Note that the buffer reference passed to the ContentHandler may be
+ * invalidated if the FastStringBuffer is edited; it's the user's
+ * responsibility to manage access to the FastStringBuffer to prevent this
+ * problem from arising.
+ * <p>
+ * Note too that there is no promise that the output will be sent as a
+ * single call. As is always true in SAX, one logical string may be split
+ * across multiple blocks of memory and hence delivered as several
+ * successive events.
+ *
+ * @param ch SAX ContentHandler object to receive the event.
+ * @param start Offset of first character in the range.
+ * @param length Number of characters to send.
+ * @exception org.xml.sax.SAXException may be thrown by handler's
+ * characters() method.
+ */
+ public void sendSAXcharacters(org.xml.sax.ContentHandler ch,int start, int
length)
+ throws org.xml.sax.SAXException
+ {
+ int stop=start+length;
+ int startChunk=start>>>m_chunkBits;
+ int startColumn=start&m_chunkMask;
+ int stopChunk=stop>>>m_chunkBits;
+ int stopColumn=stop&m_chunkMask;
+
+ for(int i=startChunk;i<stopChunk;++i)
+ {
+ ch.characters(m_array[i],startColumn,m_chunkSize-startColumn);
+ startColumn=0; // after first chunk
+ }
- m_firstFree += length;
+ // Last, or only, chunk
+ ch.characters(m_array[stopChunk],startColumn,stopColumn-startColumn);
}
+
}
1.6 +1 -2 xml-xalan/java/src/trax/Attic/trax.properties