mkwan 2003/02/19 08:39:15
Modified: java/src/org/apache/xml/dtm/ref Tag: XSLTC_DTM
DTMDocumentImpl.java ExpandedNameTable.java
ExtendedType.java
Log:
XSLTC_DTM performance work
Revamp the ExpandedNameTable. Instead of using java.util.Hashtable, we
directly integrate a simple hash algorithm into this class. This is
proved to be a significant improvement for documents which have many expanded
names. The get() and put() operations are combined together to share
the same hash calculation code. There is a new rehash() method which
is responsible for expanding the hash table. The inner class HashEntry
represents an entry in the hash table.
Revision Changes Path
No revision
No revision
1.7.8.3 +1 -1
xml-xalan/java/src/org/apache/xml/dtm/ref/DTMDocumentImpl.java
Index: DTMDocumentImpl.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xml/dtm/ref/DTMDocumentImpl.java,v
retrieving revision 1.7.8.2
retrieving revision 1.7.8.3
diff -u -r1.7.8.2 -r1.7.8.3
--- DTMDocumentImpl.java 30 Jan 2003 18:41:52 -0000 1.7.8.2
+++ DTMDocumentImpl.java 19 Feb 2003 16:39:14 -0000 1.7.8.3
@@ -179,7 +179,7 @@
// retrieve them each time. Or this needs to be
// an interface _implemented_ by this class... which might be
simplest!
private ExpandedNameTable m_expandedNames=
- new ExpandedNameTable(m_localNames,m_nsNames);
+ new ExpandedNameTable();
private XMLStringFactory m_xsf;
1.4.2.9 +154 -77
xml-xalan/java/src/org/apache/xml/dtm/ref/ExpandedNameTable.java
Index: ExpandedNameTable.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xml/dtm/ref/ExpandedNameTable.java,v
retrieving revision 1.4.2.8
retrieving revision 1.4.2.9
diff -u -r1.4.2.8 -r1.4.2.9
--- ExpandedNameTable.java 31 Jan 2003 16:30:17 -0000 1.4.2.8
+++ ExpandedNameTable.java 19 Feb 2003 16:39:15 -0000 1.4.2.9
@@ -56,36 +56,29 @@
*/
package org.apache.xml.dtm.ref;
-import java.util.Hashtable;
-
import org.apache.xml.dtm.DTM;
/**
* This is a default implementation of a table that manages mappings from
* expanded names to expandedNameIDs.
*
- * %REVIEW% Note that this is not really a separate table, or a
- * separate pool. Instead, it's an access method build on top of the
- * existing pools, using three pieces of information: the index
- * numbers for a node's namespaceURI, localName, and node type, which
- * are combined to yield a composite index number.
- *
- * %TBD% startup sequence -- how this gets access to the appropriate
- * string pools in the DTMDocument/stylesheet.
- *
- * */
+ * %OPT% The performance of the getExpandedTypeID() method is very important
+ * to DTM building. To get the best performance out of this class, we
implement
+ * a simple hash algorithm directly into this class, instead of using the
+ * inefficient java.util.Hashtable. The code for the get and put operations
+ * are combined in getExpandedTypeID() method to share the same hash
calculation
+ * code. We only need to implement the rehash() interface which is used to
+ * expand the hash table.
+ */
public class ExpandedNameTable
{
- /** Probably a reference to static pool. */
- //private DTMStringPool m_locNamesPool;
-
- /** Probably a reference to static pool. */
- //private DTMStringPool m_namespaceNames;
-
/** Array of extended types for this document */
- private /*static*/ ExtendedType[] m_extendedTypes;
+ private ExtendedType[] m_extendedTypes;
+ /** The initial size of the m_extendedTypes array */
+ private static int m_initialSize = 128;
+
/** Next available extended type */
// %REVIEW% Since this is (should be) always equal
// to the length of m_extendedTypes, do we need this?
@@ -106,118 +99,181 @@
public static final int NOTATION = ((int)DTM.NOTATION_NODE) ;
public static final int NAMESPACE = ((int)DTM.NAMESPACE_NODE) ;
- Hashtable m_hashtable = new Hashtable();
-
/** Workspace for lookup. NOT THREAD SAFE!
* */
- ExtendedType hashET=new ExtendedType(-1,"","");
+ ExtendedType hashET = new ExtendedType(-1, "", "");
- private static Hashtable m_defaultHashtable;
+ /** The array to store the default extended types. */
private static ExtendedType[] m_defaultExtendedTypes;
/**
- * Init default vales
+ * The default load factor of the Hashtable.
+ * This is used to calcualte the threshold.
+ */
+ private static float m_loadFactor = 0.75f;
+
+ /**
+ * The initial capacity of the hash table. Use a bigger number
+ * to avoid the cost of expanding the table.
+ */
+ private static int m_initialCapacity = 203;
+
+ /**
+ * The capacity of the hash table, i.e. the size of the
+ * internal HashEntry array.
+ */
+ private int m_capacity;
+
+ /**
+ * The threshold of the hash table, which is equal to capacity *
loadFactor.
+ * If the number of entries in the hash table is bigger than the threshold,
+ * the hash table needs to be expanded.
+ */
+ private int m_threshold;
+
+ /**
+ * The internal array to store the hash entries.
+ * Each array member is a slot for a hash bucket.
+ */
+ private HashEntry[] m_table;
+
+ /**
+ * Init default values
*/
static {
- // use bigger values than default, to avoid reallocation in the future
- m_defaultExtendedTypes = new ExtendedType[23];
- m_defaultHashtable = new Hashtable(23, 0.75f);
+ m_defaultExtendedTypes = new ExtendedType[DTM.NTYPES];
for (int i = 0; i < DTM.NTYPES; i++)
{
- ExtendedType newET = new ExtendedType(i, "", "");
- m_defaultExtendedTypes[i] = newET;
- m_defaultHashtable.put(newET, new Integer(i));
+ m_defaultExtendedTypes[i] = new ExtendedType(i, "", "");
}
}
/**
- * Create an expanded name table that uses private string pool lookup.
+ * Create an expanded name table.
*/
public ExpandedNameTable()
{
- //m_locNamesPool = new DTMSafeStringPool();
- //m_namespaceNames = new DTMSafeStringPool();
+ m_capacity = m_initialCapacity;
+ m_threshold = (int)(m_capacity * m_loadFactor);
+ m_table = new HashEntry[m_capacity];
+
initExtendedTypes();
}
- /**
- * Constructor ExpandedNameTable
- *
- * @param locNamesPool Local element names lookup.
- * @param namespaceNames Namespace values lookup.
- */
- public ExpandedNameTable(DTMStringPool locNamesPool,
- DTMStringPool namespaceNames)
- {
- //m_locNamesPool = locNamesPool;
- //m_namespaceNames = namespaceNames;
- initExtendedTypes();
- }
/**
* Initialize the vector of extended types with the
* basic DOM node types.
*/
private void initExtendedTypes()
- {
- // Since objects in m_extendedTypes and m_hashtable are never changed
- // it should be safe to copy default tables
- m_extendedTypes = new ExtendedType[m_defaultExtendedTypes.length];
+ {
+ m_extendedTypes = new ExtendedType[m_initialSize];
for (int i = 0; i < DTM.NTYPES; i++) {
m_extendedTypes[i] = m_defaultExtendedTypes[i];
+ m_table[i] = new HashEntry(m_defaultExtendedTypes[i], i, i, null);
}
- m_hashtable = (Hashtable)m_defaultHashtable.clone();
+
m_nextType = DTM.NTYPES;
}
/**
- * Given an expanded name, return an ID. If the expanded-name does not
- * exist in the internal tables, the entry will be created, and the ID will
- * be returned. Any additional nodes that are created that have this
- * expanded name will use this ID.
- *
- * @param namespace
- * @param localName
+ * Given an expanded name represented by namespace, local name and node
type,
+ * return an ID. If the expanded-name does not exist in the internal
tables,
+ * the entry will be created, and the ID will be returned. Any additional
+ * nodes that are created that have this expanded name will use this ID.
+ *
+ * @param namespace The namespace
+ * @param localName The local name
+ * @param type The node type
*
* @return the expanded-name id of the node.
*/
public int getExpandedTypeID(String namespace, String localName, int type)
{
- /*int nsID = (null != namespace) ?
m_namespaceNames.stringToIndex(namespace) : 0;
- int lnID = m_locNamesPool.stringToIndex(localName);
-
- int expandedTypeID = (type << (BITS_PER_NAMESPACE+BITS_PER_LOCALNAME))
- | (nsID << BITS_PER_LOCALNAME) | lnID;
- return expandedTypeID;
-*/
if (null == namespace)
namespace = "";
if (null == localName)
localName = "";
- // Set our reusable ExpandedType so we can look
- // it up in the hashtable. Not threadsafe, but
- // avoids creating a new object until we know
- // this isn't one we've seen before.
- hashET.redefine(type,namespace,localName);
-
- Object eType;
- if ((eType = m_hashtable.get(hashET)) != null )
- return ((Integer)eType).intValue();
+
+ // Calculate the hash code
+ int hash = type + namespace.hashCode() + localName.hashCode();
+
+ // Redefine the hashET object to represent the new expanded name.
+ hashET.redefine(type, namespace, localName, hash);
+
+ // Calculate the index into the HashEntry table.
+ int index = hash % m_capacity;
+ if (index < 0)
+ index = -index;
+
+ // Look up the expanded name in the hash table. Return the id if
+ // the expanded name is already in the hash table.
+ for (HashEntry e = m_table[index]; e != null; e = e.next)
+ {
+ if (e.hash == hash && e.key.equals(hashET))
+ return e.value;
+ }
- ExtendedType newET=new ExtendedType(type, namespace, localName);
+ // Expand the internal HashEntry array if necessary.
+ if (m_nextType > m_threshold)
+ rehash();
+
+ // Create a new ExtendedType object
+ ExtendedType newET = new ExtendedType(type, namespace, localName, hash);
+
+ // Expand the m_extendedTypes array if necessary.
if (m_extendedTypes.length == m_nextType) {
- ExtendedType[] newArray = new ExtendedType[m_extendedTypes.length*2];
+ ExtendedType[] newArray = new ExtendedType[m_extendedTypes.length *
2];
System.arraycopy(m_extendedTypes, 0, newArray, 0,
m_extendedTypes.length);
m_extendedTypes = newArray;
}
+
m_extendedTypes[m_nextType] = newET;
- m_hashtable.put(newET, new Integer(m_nextType));
+
+ // Create a new hash entry for the new ExtendedType and put it into
+ // the table.
+ HashEntry entry = new HashEntry(newET, m_nextType, hash, m_table[index]);
+ m_table[index] = entry;
+
return m_nextType++;
}
/**
+ * Increases the capacity of and internally reorganizes the hashtable,
+ * in order to accommodate and access its entries more efficiently.
+ * This method is called when the number of keys in the hashtable exceeds
+ * this hashtable's capacity and load factor.
+ */
+ private void rehash()
+ {
+ int oldCapacity = m_capacity;
+ HashEntry[] oldTable = m_table;
+
+ int newCapacity = 2 * oldCapacity + 1;
+ m_capacity = newCapacity;
+ m_threshold = (int)(newCapacity * m_loadFactor);
+
+ m_table = new HashEntry[newCapacity];
+ for (int i = oldCapacity-1; i >=0 ; i--)
+ {
+ for (HashEntry old = oldTable[i]; old != null; )
+ {
+ HashEntry e = old;
+ old = old.next;
+
+ int newIndex = e.hash % newCapacity;
+ if (newIndex < 0)
+ newIndex = -newIndex;
+
+ e.next = m_table[newIndex];
+ m_table[newIndex] = e;
+ }
+ }
+ }
+
+ /**
* Given a type, return an expanded name ID.Any additional nodes that are
* created that have this expanded name will use this ID.
*
@@ -316,6 +372,27 @@
public ExtendedType[] getExtendedTypes()
{
return m_extendedTypes;
+ }
+
+ /**
+ * Inner class which represents a hash table entry.
+ * The field next points to the next entry which is hashed into
+ * the same bucket in the case of "hash collision".
+ */
+ private static final class HashEntry
+ {
+ ExtendedType key;
+ int value;
+ int hash;
+ HashEntry next;
+
+ protected HashEntry(ExtendedType key, int value, int hash, HashEntry
next)
+ {
+ this.key = key;
+ this.value = value;
+ this.hash = hash;
+ this.next = next;
+ }
}
}
1.1.2.2 +61 -25
xml-xalan/java/src/org/apache/xml/dtm/ref/Attic/ExtendedType.java
Index: ExtendedType.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xml/dtm/ref/Attic/ExtendedType.java,v
retrieving revision 1.1.2.1
retrieving revision 1.1.2.2
diff -u -r1.1.2.1 -r1.1.2.2
--- ExtendedType.java 31 Jan 2003 16:30:17 -0000 1.1.2.1
+++ ExtendedType.java 19 Feb 2003 16:39:15 -0000 1.1.2.2
@@ -67,54 +67,90 @@
private String localName;
private int hash;
+ /**
+ * Create an ExtendedType object from node type, namespace and local
name.
+ * The hash code is calculated from the node type, namespace and local
name.
+ *
+ * @param nodetype Type of the node
+ * @param namespace Namespace of the node
+ * @param localName Local name of the node
+ */
public ExtendedType (int nodetype, String namespace, String localName)
{
this.nodetype = nodetype;
this.namespace = namespace;
this.localName = localName;
- this.hash=nodetype+namespace.hashCode()+localName.hashCode();
+ this.hash = nodetype + namespace.hashCode() + localName.hashCode();
}
- /* This is intended to be used ONLY on the hashET
- * object. Using it elsewhere will mess up existing
- * hashtable entries!
- * */
+ /**
+ * Create an ExtendedType object from node type, namespace, local name
+ * and a given hash code.
+ *
+ * @param nodetype Type of the node
+ * @param namespace Namespace of the node
+ * @param localName Local name of the node
+ * @param hash The given hash code
+ */
+ public ExtendedType (int nodetype, String namespace, String localName,
int hash)
+ {
+ this.nodetype = nodetype;
+ this.namespace = namespace;
+ this.localName = localName;
+ this.hash = hash;
+ }
+
+ /**
+ * Redefine this ExtendedType object to represent a different extended
type.
+ * This is intended to be used ONLY on the hashET object. Using it
elsewhere
+ * will mess up existing hashtable entries!
+ */
protected void redefine(int nodetype, String namespace, String localName)
{
this.nodetype = nodetype;
this.namespace = namespace;
this.localName = localName;
- this.hash=nodetype+namespace.hashCode()+localName.hashCode();
+ this.hash = nodetype + namespace.hashCode() + localName.hashCode();
}
- /* Override super method
- * */
+ /**
+ * Redefine this ExtendedType object to represent a different extended
type.
+ * This is intended to be used ONLY on the hashET object. Using it
elsewhere
+ * will mess up existing hashtable entries!
+ */
+ protected void redefine(int nodetype, String namespace, String
localName, int hash)
+ {
+ this.nodetype = nodetype;
+ this.namespace = namespace;
+ this.localName = localName;
+ this.hash = hash;
+ }
+
+ /**
+ * Override the hashCode() method in the Object class
+ */
public int hashCode()
{
- return hash;
+ return hash;
}
- /* Override super method
- * */
- public boolean equals(Object other)
- {
- //Usually an optimization, but
- // won't arise in our usage:
- //if(other==this) return true;
+ /**
+ * Test if this ExtendedType object is equal to the given ExtendedType.
+ *
+ * @param other The other ExtendedType object to test for equality
+ * @return true if the two ExtendedType objects are equal.
+ */
+ public boolean equals(ExtendedType other)
+ {
try
{
- ExtendedType et=(ExtendedType)other;
- return et.nodetype==this.nodetype &&
- et.localName.equals(this.localName) &&
- et.namespace.equals(this.namespace);
- }
- catch(ClassCastException e)
- {
- return false;
+ return other.nodetype == this.nodetype &&
+ other.localName.equals(this.localName) &&
+ other.namespace.equals(this.namespace);
}
catch(NullPointerException e)
{
- return false;
+ return false;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]