minchau 2003/06/08 23:32:11
Modified: java/src/org/apache/xml/serializer ToStream.java
Log:
Speed up ToStream.characters(char[] , int, int) method by
caching commonly calculated boolean expression for characters in the range
(0-126)
in an array in CharInfo.
Also move a whitespace check out of the loop that processes characters and
into
a separate loop. Most of the time this whitspace checking loop will end
early and
speed up the other loop which no longer checks for whitespace with every
character.
Submitted by: Brian Minchau
Revision Changes Path
1.9 +126 -75
xml-xalan/java/src/org/apache/xml/serializer/ToStream.java
Index: ToStream.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xml/serializer/ToStream.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- ToStream.java 30 May 2003 07:08:43 -0000 1.8
+++ ToStream.java 9 Jun 2003 06:32:11 -0000 1.9
@@ -64,8 +64,6 @@
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
-import java.util.BitSet;
-import java.util.Hashtable;
import java.util.Properties;
import java.util.StringTokenizer;
import java.util.Vector;
@@ -78,7 +76,6 @@
import org.apache.xml.utils.BoolStack;
import org.apache.xml.utils.FastStringBuffer;
import org.apache.xml.utils.QName;
-import org.apache.xml.utils.SystemIDResolver;
import org.apache.xml.utils.TreeWalker;
import org.apache.xml.utils.WrappedRuntimeException;
import org.w3c.dom.Node;
@@ -89,12 +86,9 @@
//import com.sun.media.sound.IESecurity;
/**
- * This abstract class is a base class for other serializers (xml, html, text
- * ...) that write output to a stream.
- * @author minchau
- *
+ * This abstract class is a base class for other stream
+ * serializers (xml, html, text ...) that write output to a stream.
*/
-
abstract public class ToStream extends SerializerBase
{
@@ -883,6 +877,14 @@
throws SAXException
{
}
+
+ static boolean isWhitespace(char ch)
+ {
+ if (ch == 0x20 || ch == 0x0A || ch == 0x0D || ch == 0x09 )
+ return true;
+ else
+ return false;
+ }
/**
* Tell if this character can be written without escaping.
@@ -1376,7 +1378,7 @@
*
* @throws org.xml.sax.SAXException
*/
- public void characters(char chars[], int start, int length)
+ public void characters(final char chars[], final int start, final int
length)
throws org.xml.sax.SAXException
{
if (0 == length)
@@ -1428,85 +1430,89 @@
closeStartTag();
m_startTagOpen = false;
}
-
-
- int startClean = start;
- int lengthClean = 0;
- // int pos = 0;
- int end = start + length;
- boolean checkWhite = true;
- final int maxCharacter = m_maxCharacter;
- final BitSet specialsMap = m_charInfo.m_specialsMap;
+
try
{
- for (int i = start; i < end; i++)
- {
- char ch = chars[i];
-
- if (checkWhite
- && ((ch > 0x20)
- || !((ch == 0x20)
- || (ch == 0x09)
- || (ch == 0xD)
- || (ch == 0xA))))
- {
- m_ispreserve = true;
- checkWhite = false;
- }
-
- // The first if(...) has the most common part of
escapingNotNeeded()
- // inlined to save the call. If the expression is false it
will
- // fall back to the next else if(...) which does the real
thing
- // with esacapingNotNeeded()
- if ((((ch < 127)
- && (0x20 <= ch || (0x0A == ch || 0x0D == ch || 0x09 ==
ch)))
- && (!specialsMap.get(ch)))
- || ('"' == ch))
+ int i;
+ char ch1;
+ int startClean;
+
+ // skip any leading whitspace
+ // don't go off the end and use a hand inlined version
+ // of isWhitespace(ch)
+ final int end = start + length;
+ int lastDirty = start - 1; // last character that needed
processing
+ for (i = start;
+ ((i < end)
+ && ((ch1 = chars[i]) == 0x20
+ || ch1 == 0xA
+ || ch1 == 0xD
+ || ch1 == 0x09));
+ i++)
+ {
+ /*
+ * We are processing leading whitespace, but are doing the
same
+ * processing for dirty characters here as for
non-whitespace.
+ *
+ */
+ if (!m_charInfo.isASCIIClean(ch1))
{
- lengthClean++;
+ lastDirty = processDirty(chars,end, i,ch1, lastDirty);
+ i = lastDirty;
}
- else if (
- (escapingNotNeeded(ch) && (!specialsMap.get(ch)))
+ }
+ /* If there is some non-whitespace, mark that we may need
+ * to preserve this. This is only important if we have
indentation on.
+ */
+ if (i < end)
+ m_ispreserve = true;
+
+
+// int lengthClean; // number of clean characters in a row
+// final boolean[] isAsciiClean = m_charInfo.getASCIIClean();
+
+ // we've skipped the leading whitespace, now deal with the rest
+ for (; i < end; i++)
+ {
+ {
+ // A tight loop to skip over common clean chars
+ // This tight loop makes it easier for the JIT
+ // to optimize.
+ char ch2;
+ while (i<end
+ && ((ch2 = chars[i])<127)
+ && m_charInfo.isASCIIClean(ch2))
+ i++;
+ if (i == end)
+ break;
+ }
+
+ final char ch = chars[i];
+ if (
+
+ (escapingNotNeeded(ch) && (!m_charInfo.isSpecial(ch)))
|| ('"' == ch))
{
- lengthClean++;
+ ; // a character needing no special processing
}
else
{
- if (lengthClean > 0)
- {
- m_writer.write(chars, startClean, lengthClean);
-
- lengthClean = 0;
- }
-
- if (CharInfo.S_LINEFEED == ch)
- {
- m_writer.write(m_lineSep, 0, m_lineSepLen);
-
- startClean = i + 1;
- }
- else
- {
- startClean =
- accumDefaultEscape(
- m_writer,
- ch,
- i,
- chars,
- end,
- false);
- i = startClean - 1;
- }
+ lastDirty = processDirty(chars,end, i, ch, lastDirty);
+ i = lastDirty;
}
}
-
- if (lengthClean > 0)
+
+ // we've reached the end. Any clean characters at the
+ // end of the array than need to be written out?
+ startClean = lastDirty + 1;
+ if (i > startClean)
{
+ int lengthClean = i - startClean;
m_writer.write(chars, startClean, lengthClean);
}
+ // For indentation purposes, mark that we've just writen text out
m_isprevtext = true;
}
catch (IOException e)
@@ -1518,6 +1524,54 @@
if (m_tracer != null)
super.fireCharEvent(chars, start, length);
}
+ /**
+ * Process a dirty character and any preeceding clean characters
+ * that were not yet processed.
+ * @param chars array of characters being processed
+ * @param end one (1) beyond the last character
+ * in chars to be processed
+ * @param i the index of the dirty character
+ * @param ch the character in chars[i]
+ * @param lastDirty the last dirty character previous to i
+ * @return the index of the last character processed
+ */
+ private int processDirty(
+ char[] chars,
+ int end,
+ int i,
+ char ch,
+ int lastDirty) throws IOException
+ {
+ int startClean = lastDirty + 1;
+ // if we have some clean characters accumulated
+ // process them before the dirty one.
+ if (i > startClean)
+ {
+ int lengthClean = i - startClean;
+ m_writer.write(chars, startClean, lengthClean);
+ }
+
+ // process the "dirty" character
+ if (CharInfo.S_LINEFEED == ch)
+ {
+ m_writer.write(m_lineSep, 0, m_lineSepLen);
+ }
+ else
+ {
+ startClean =
+ accumDefaultEscape(
+ m_writer,
+ (char)ch,
+ i,
+ chars,
+ end,
+ false);
+ i = startClean - 1;
+ }
+ // Return the index of the last character that we just processed
+ // which is a dirty character.
+ return i;
+ }
/**
* Receive notification of character data.
@@ -2757,9 +2811,6 @@
* written by the method writeAttrString() into a string buffer.
* In this manner trace events, and the real writing of attributes will
use
* the same code.
- *
- * @author minchau
- *
*/
private class WritertoStringBuffer extends java.io.Writer
{
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]