mkwan       2003/02/19 08:39:15

  Modified:    java/src/org/apache/xml/dtm/ref Tag: XSLTC_DTM
                        DTMDocumentImpl.java ExpandedNameTable.java
                        ExtendedType.java
  Log:
  XSLTC_DTM performance work
  Revamp the ExpandedNameTable. Instead of using java.util.Hashtable, we
  directly integrate a simple hash algorithm into this class. This is
  proved to be a significant improvement for documents which have many expanded
  names. The get() and put() operations are combined together to share
  the same hash calculation code. There is a new rehash() method which
  is responsible for expanding the hash table. The inner class HashEntry
  represents an entry in the hash table.
  
  Revision  Changes    Path
  No                   revision
  
  
  No                   revision
  
  
  1.7.8.3   +1 -1      
xml-xalan/java/src/org/apache/xml/dtm/ref/DTMDocumentImpl.java
  
  Index: DTMDocumentImpl.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xalan/java/src/org/apache/xml/dtm/ref/DTMDocumentImpl.java,v
  retrieving revision 1.7.8.2
  retrieving revision 1.7.8.3
  diff -u -r1.7.8.2 -r1.7.8.3
  --- DTMDocumentImpl.java      30 Jan 2003 18:41:52 -0000      1.7.8.2
  +++ DTMDocumentImpl.java      19 Feb 2003 16:39:14 -0000      1.7.8.3
  @@ -179,7 +179,7 @@
           // retrieve them each time. Or this needs to be
           // an interface _implemented_ by this class... which might be 
simplest!
           private ExpandedNameTable m_expandedNames=
  -                new ExpandedNameTable(m_localNames,m_nsNames);
  +                new ExpandedNameTable();
   
           private XMLStringFactory m_xsf;
   
  
  
  
  1.4.2.9   +154 -77   
xml-xalan/java/src/org/apache/xml/dtm/ref/ExpandedNameTable.java
  
  Index: ExpandedNameTable.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xalan/java/src/org/apache/xml/dtm/ref/ExpandedNameTable.java,v
  retrieving revision 1.4.2.8
  retrieving revision 1.4.2.9
  diff -u -r1.4.2.8 -r1.4.2.9
  --- ExpandedNameTable.java    31 Jan 2003 16:30:17 -0000      1.4.2.8
  +++ ExpandedNameTable.java    19 Feb 2003 16:39:15 -0000      1.4.2.9
  @@ -56,36 +56,29 @@
    */
   package org.apache.xml.dtm.ref;
   
  -import java.util.Hashtable;
  -
   import org.apache.xml.dtm.DTM;
   
   /**
    * This is a default implementation of a table that manages mappings from
    * expanded names to expandedNameIDs.
    *
  - * %REVIEW% Note that this is not really a separate table, or a
  - * separate pool. Instead, it's an access method build on top of the
  - * existing pools, using three pieces of information: the index
  - * numbers for a node's namespaceURI, localName, and node type, which
  - * are combined to yield a composite index number.
  - *
  - * %TBD% startup sequence -- how this gets access to the appropriate
  - * string pools in the DTMDocument/stylesheet.
  - *
  - * */
  + * %OPT% The performance of the getExpandedTypeID() method is very important 
  + * to DTM building. To get the best performance out of this class, we 
implement
  + * a simple hash algorithm directly into this class, instead of using the
  + * inefficient java.util.Hashtable. The code for the get and put operations
  + * are combined in getExpandedTypeID() method to share the same hash 
calculation
  + * code. We only need to implement the rehash() interface which is used to
  + * expand the hash table.
  + */
   public class ExpandedNameTable
   {
   
  -  /** Probably a reference to static pool.     */
  -  //private DTMStringPool m_locNamesPool;
  -
  -  /** Probably a reference to static pool.   */
  -  //private DTMStringPool m_namespaceNames;
  -
     /** Array of extended types for this document   */
  -  private /*static*/ ExtendedType[] m_extendedTypes;
  +  private ExtendedType[] m_extendedTypes;
   
  +  /** The initial size of the m_extendedTypes array */
  +  private static int m_initialSize = 128;
  +  
     /** Next available extended type   */
     // %REVIEW% Since this is (should be) always equal 
     // to the length of m_extendedTypes, do we need this? 
  @@ -106,118 +99,181 @@
     public static final int NOTATION = ((int)DTM.NOTATION_NODE) ;
     public static final int NAMESPACE = ((int)DTM.NAMESPACE_NODE) ;
   
  -  Hashtable m_hashtable = new Hashtable();
  -
     /** Workspace for lookup. NOT THREAD SAFE!
      * */
  -  ExtendedType hashET=new ExtendedType(-1,"","");
  +  ExtendedType hashET = new ExtendedType(-1, "", "");
   
  -  private static Hashtable m_defaultHashtable;
  +  /** The array to store the default extended types. */
     private static ExtendedType[] m_defaultExtendedTypes;
   
     /**
  -   *  Init default vales
  +   * The default load factor of the Hashtable.
  +   * This is used to calcualte the threshold.
  +   */
  +  private static float m_loadFactor = 0.75f;
  +    
  +  /**
  +   * The initial capacity of the hash table. Use a bigger number
  +   * to avoid the cost of expanding the table.
  +   */ 
  +  private static int m_initialCapacity = 203;
  +  
  +  /**
  +   * The capacity of the hash table, i.e. the size of the
  +   * internal HashEntry array.
  +   */ 
  +  private int m_capacity;
  +  
  +  /** 
  +   * The threshold of the hash table, which is equal to capacity * 
loadFactor.
  +   * If the number of entries in the hash table is bigger than the threshold,
  +   * the hash table needs to be expanded.
  +   */
  +  private int m_threshold;
  +  
  +  /**
  +   * The internal array to store the hash entries.
  +   * Each array member is a slot for a hash bucket.
  +   */
  +  private HashEntry[] m_table;
  +
  +  /**
  +   * Init default values
      */
     static {
  -    // use bigger values than default, to avoid reallocation in the future
  -    m_defaultExtendedTypes = new ExtendedType[23];
  -    m_defaultHashtable = new Hashtable(23, 0.75f);
  +    m_defaultExtendedTypes = new ExtendedType[DTM.NTYPES];
   
       for (int i = 0; i < DTM.NTYPES; i++)
       {
  -      ExtendedType newET = new ExtendedType(i, "", "");
  -      m_defaultExtendedTypes[i] = newET;
  -      m_defaultHashtable.put(newET, new Integer(i));
  +      m_defaultExtendedTypes[i] = new ExtendedType(i, "", "");
       }
     }
   
     /**
  -   * Create an expanded name table that uses private string pool lookup.
  +   * Create an expanded name table.
      */
     public ExpandedNameTable()
     {
  -    //m_locNamesPool = new DTMSafeStringPool();
  -    //m_namespaceNames = new DTMSafeStringPool();
  +    m_capacity = m_initialCapacity;
  +    m_threshold = (int)(m_capacity * m_loadFactor);
  +    m_table = new HashEntry[m_capacity];
  +    
       initExtendedTypes();
     }
   
  -  /**
  -   * Constructor ExpandedNameTable
  -   *
  -   * @param locNamesPool Local element names lookup.
  -   * @param namespaceNames Namespace values lookup.
  -   */
  -  public ExpandedNameTable(DTMStringPool locNamesPool,
  -                           DTMStringPool namespaceNames)
  -  {
  -    //m_locNamesPool = locNamesPool;
  -    //m_namespaceNames = namespaceNames;
  -    initExtendedTypes();
  -  }
   
     /**
      *  Initialize the vector of extended types with the
      *  basic DOM node types.
      */
     private void initExtendedTypes()
  -  {
  -    // Since objects in m_extendedTypes and m_hashtable are never changed
  -    // it should be safe to copy default tables
  -    m_extendedTypes = new ExtendedType[m_defaultExtendedTypes.length];
  +  {    
  +    m_extendedTypes = new ExtendedType[m_initialSize];
       for (int i = 0; i < DTM.NTYPES; i++) {
           m_extendedTypes[i] = m_defaultExtendedTypes[i];
  +        m_table[i] = new HashEntry(m_defaultExtendedTypes[i], i, i, null);
       }
  -    m_hashtable = (Hashtable)m_defaultHashtable.clone();
  +    
       m_nextType = DTM.NTYPES;
     }
   
     /**
  -   * Given an expanded name, return an ID.  If the expanded-name does not
  -   * exist in the internal tables, the entry will be created, and the ID will
  -   * be returned.  Any additional nodes that are created that have this
  -   * expanded name will use this ID.
  -   *
  -   * @param namespace
  -   * @param localName
  +   * Given an expanded name represented by namespace, local name and node 
type,
  +   * return an ID.  If the expanded-name does not exist in the internal 
tables,
  +   * the entry will be created, and the ID will be returned.  Any additional 
  +   * nodes that are created that have this expanded name will use this ID.
  +   *
  +   * @param namespace The namespace
  +   * @param localName The local name
  +   * @param type The node type
      *
      * @return the expanded-name id of the node.
      */
     public int getExpandedTypeID(String namespace, String localName, int type)
     {
  -    /*int nsID = (null != namespace) ? 
m_namespaceNames.stringToIndex(namespace) : 0;
  -    int lnID = m_locNamesPool.stringToIndex(localName);
  -
  -    int expandedTypeID = (type << (BITS_PER_NAMESPACE+BITS_PER_LOCALNAME))
  -                       | (nsID << BITS_PER_LOCALNAME) | lnID;
  -    return expandedTypeID;
  -*/
       if (null == namespace)
         namespace = "";
       if (null == localName)
         localName = "";
  -    // Set our reusable ExpandedType so we can look
  -    // it up in the hashtable. Not threadsafe, but
  -    // avoids creating a new object until we know
  -    // this isn't one we've seen before.
  -    hashET.redefine(type,namespace,localName);
  -
  -    Object eType;
  -    if ((eType = m_hashtable.get(hashET)) != null )
  -      return ((Integer)eType).intValue();
  +    
  +    // Calculate the hash code
  +    int hash = type + namespace.hashCode() + localName.hashCode();
  +    
  +    // Redefine the hashET object to represent the new expanded name.
  +    hashET.redefine(type, namespace, localName, hash);
  +    
  +    // Calculate the index into the HashEntry table.
  +    int index = hash % m_capacity;
  +    if (index < 0)
  +      index = -index;
  +
  +    // Look up the expanded name in the hash table. Return the id if
  +    // the expanded name is already in the hash table.
  +    for (HashEntry e = m_table[index]; e != null; e = e.next)
  +    {
  +      if (e.hash == hash && e.key.equals(hashET))
  +        return e.value;
  +    }
   
  -    ExtendedType newET=new ExtendedType(type, namespace, localName);
  +    // Expand the internal HashEntry array if necessary.
  +    if (m_nextType > m_threshold)
  +      rehash();
  +    
  +    // Create a new ExtendedType object
  +    ExtendedType newET = new ExtendedType(type, namespace, localName, hash);
  +    
  +    // Expand the m_extendedTypes array if necessary.
       if (m_extendedTypes.length == m_nextType) {
  -        ExtendedType[] newArray = new ExtendedType[m_extendedTypes.length*2];
  +        ExtendedType[] newArray = new ExtendedType[m_extendedTypes.length * 
2];
           System.arraycopy(m_extendedTypes, 0, newArray, 0,
                            m_extendedTypes.length);
           m_extendedTypes = newArray;
       }
  +    
       m_extendedTypes[m_nextType] = newET;
  -    m_hashtable.put(newET, new Integer(m_nextType));
  +    
  +    // Create a new hash entry for the new ExtendedType and put it into 
  +    // the table.
  +    HashEntry entry = new HashEntry(newET, m_nextType, hash, m_table[index]);
  +    m_table[index] = entry;
  +
       return m_nextType++;
     }
   
     /**
  +   * Increases the capacity of and internally reorganizes the hashtable, 
  +   * in order to accommodate and access its entries more efficiently. 
  +   * This method is called when the number of keys in the hashtable exceeds
  +   * this hashtable's capacity and load factor.
  +   */
  +  private void rehash()
  +  {
  +    int oldCapacity = m_capacity;
  +    HashEntry[] oldTable = m_table;
  +      
  +    int newCapacity = 2 * oldCapacity + 1;
  +    m_capacity = newCapacity;
  +    m_threshold = (int)(newCapacity * m_loadFactor);
  +      
  +    m_table = new HashEntry[newCapacity];
  +    for (int i = oldCapacity-1; i >=0 ; i--)
  +    {
  +      for (HashEntry old = oldTable[i]; old != null; )
  +      {
  +        HashEntry e = old;
  +        old = old.next;
  +          
  +        int newIndex = e.hash % newCapacity;
  +        if (newIndex < 0)
  +          newIndex = -newIndex;
  +          
  +        e.next = m_table[newIndex];
  +        m_table[newIndex] = e;
  +      }
  +    }
  +  }
  +
  +  /**
      * Given a type, return an expanded name ID.Any additional nodes that are
      * created that have this expanded name will use this ID.
      *
  @@ -316,6 +372,27 @@
     public ExtendedType[] getExtendedTypes()
     {
       return m_extendedTypes;
  +  }
  +
  +  /**
  +   * Inner class which represents a hash table entry.
  +   * The field next points to the next entry which is hashed into
  +   * the same bucket in the case of "hash collision".
  +   */
  +  private static final class HashEntry
  +  {
  +    ExtendedType key;
  +    int value;
  +    int hash;
  +    HashEntry next;
  +      
  +    protected HashEntry(ExtendedType key, int value, int hash, HashEntry 
next)
  +    {
  +      this.key = key;
  +      this.value = value;
  +      this.hash = hash;
  +      this.next = next;
  +    }
     }
     
   }
  
  
  
  1.1.2.2   +61 -25    
xml-xalan/java/src/org/apache/xml/dtm/ref/Attic/ExtendedType.java
  
  Index: ExtendedType.java
  ===================================================================
  RCS file: 
/home/cvs/xml-xalan/java/src/org/apache/xml/dtm/ref/Attic/ExtendedType.java,v
  retrieving revision 1.1.2.1
  retrieving revision 1.1.2.2
  diff -u -r1.1.2.1 -r1.1.2.2
  --- ExtendedType.java 31 Jan 2003 16:30:17 -0000      1.1.2.1
  +++ ExtendedType.java 19 Feb 2003 16:39:15 -0000      1.1.2.2
  @@ -67,54 +67,90 @@
       private String localName;
       private int hash;
   
  +    /**
  +     * Create an ExtendedType object from node type, namespace and local 
name.
  +     * The hash code is calculated from the node type, namespace and local 
name.
  +     * 
  +     * @param nodetype Type of the node
  +     * @param namespace Namespace of the node
  +     * @param localName Local name of the node
  +     */
       public ExtendedType (int nodetype, String namespace, String localName)
       {
         this.nodetype = nodetype;
         this.namespace = namespace;
         this.localName = localName;
  -      this.hash=nodetype+namespace.hashCode()+localName.hashCode();
  +      this.hash = nodetype + namespace.hashCode() + localName.hashCode();
       }
   
  -     /* This is intended to be used ONLY on the hashET
  -      * object. Using it elsewhere will mess up existing
  -      * hashtable entries!
  -      * */
  +    /**
  +     * Create an ExtendedType object from node type, namespace, local name
  +     * and a given hash code.
  +     * 
  +     * @param nodetype Type of the node
  +     * @param namespace Namespace of the node
  +     * @param localName Local name of the node
  +     * @param hash The given hash code
  +     */
  +    public ExtendedType (int nodetype, String namespace, String localName, 
int hash)
  +    {
  +      this.nodetype = nodetype;
  +      this.namespace = namespace;
  +      this.localName = localName;
  +      this.hash = hash;
  +    }
  +
  +    /** 
  +     * Redefine this ExtendedType object to represent a different extended 
type.
  +     * This is intended to be used ONLY on the hashET object. Using it 
elsewhere
  +     * will mess up existing hashtable entries!
  +     */
       protected void redefine(int nodetype, String namespace, String localName)
       {
         this.nodetype = nodetype;
         this.namespace = namespace;
         this.localName = localName;
  -      this.hash=nodetype+namespace.hashCode()+localName.hashCode();
  +      this.hash = nodetype + namespace.hashCode() + localName.hashCode();
       }
   
  -    /* Override super method
  -      * */
  +    /** 
  +     * Redefine this ExtendedType object to represent a different extended 
type.
  +     * This is intended to be used ONLY on the hashET object. Using it 
elsewhere
  +     * will mess up existing hashtable entries!
  +     */
  +    protected void redefine(int nodetype, String namespace, String 
localName, int hash)
  +    {
  +      this.nodetype = nodetype;
  +      this.namespace = namespace;
  +      this.localName = localName;
  +      this.hash = hash;
  +    }
  +
  +    /**
  +     * Override the hashCode() method in the Object class
  +     */
       public int hashCode()
       {
  -     return hash;
  +      return hash;
       }
   
  -    /* Override super method
  -      * */
  -    public boolean equals(Object other)
  -    {
  -      //Usually an optimization, but
  -      // won't arise in our usage:
  -      //if(other==this) return true;
  +    /**
  +     * Test if this ExtendedType object is equal to the given ExtendedType.
  +     * 
  +     * @param other The other ExtendedType object to test for equality
  +     * @return true if the two ExtendedType objects are equal.
  +     */
  +    public boolean equals(ExtendedType other)
  +    {
         try
         {
  -          ExtendedType et=(ExtendedType)other;
  -          return et.nodetype==this.nodetype &&
  -                  et.localName.equals(this.localName) &&
  -                  et.namespace.equals(this.namespace);
  -      }
  -      catch(ClassCastException e)
  -      {
  -              return false;
  +        return other.nodetype == this.nodetype &&
  +                other.localName.equals(this.localName) &&
  +                other.namespace.equals(this.namespace);
         }
         catch(NullPointerException e)
         {
  -              return false;
  +        return false;
         }
       }
       
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to