Author: mrglavas
Date: Wed Jul 4 19:56:10 2012
New Revision: 1357381
URL: http://svn.apache.org/viewvc?rev=1357381&view=rev
Log:
Enhancements to the hashing algorithms used in internal data structures within
Xerces to make them more resistant to collisions. To improve the distribution a
new hash function is randomly selected each time the number of items in a
bucket exceeds a threshold.
Added:
xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java
(with props)
Modified:
xerces/java/trunk/src/org/apache/xerces/impl/dtd/DTDGrammar.java
xerces/java/trunk/src/org/apache/xerces/util/SoftReferenceSymbolTable.java
xerces/java/trunk/src/org/apache/xerces/util/SymbolHash.java
xerces/java/trunk/src/org/apache/xerces/util/SymbolTable.java
xerces/java/trunk/src/org/apache/xerces/util/XMLAttributesImpl.java
Modified: xerces/java/trunk/src/org/apache/xerces/impl/dtd/DTDGrammar.java
URL:
http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/impl/dtd/DTDGrammar.java?rev=1357381&r1=1357380&r2=1357381&view=diff
==============================================================================
--- xerces/java/trunk/src/org/apache/xerces/impl/dtd/DTDGrammar.java (original)
+++ xerces/java/trunk/src/org/apache/xerces/impl/dtd/DTDGrammar.java Wed Jul 4
19:56:10 2012
@@ -19,6 +19,7 @@ package org.apache.xerces.impl.dtd;
import java.util.ArrayList;
import java.util.Hashtable;
+import java.util.Random;
import org.apache.xerces.impl.dtd.models.CMAny;
import org.apache.xerces.impl.dtd.models.CMBinOp;
@@ -2639,6 +2640,26 @@ public class DTDGrammar
* @author Andy Clark, IBM
*/
protected static final class QNameHashtable {
+
+ private static final class PrimeNumberSequenceGenerator {
+
+ private static int [] PRIMES = {
+ 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43,
47, 53, 59,
+ 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113,
127, 131, 137,
+ 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199,
211, 223, 227,
+ 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293,
307, 311, 313,
+ 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397,
401, 409, 419,
+ 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491,
499, 503, 509,
+ 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601,
607, 613, 617,
+ 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701,
709, 719, 727};
+
+ static void generateSequence(int[] arrayToFill) {
+ Random r = new Random();
+ for (int i = 0; i < arrayToFill.length; ++i) {
+ arrayToFill[i] = PRIMES[r.nextInt(PRIMES.length)];
+ }
+ }
+ }
//
// Constants
@@ -2651,11 +2672,29 @@ public class DTDGrammar
// that we get a better distribution for hashing. -Ac
/** Hashtable size (101). */
private static final int HASHTABLE_SIZE = 101;
+
+ /** Maximum hash collisions per bucket for a table with load factor ==
1. */
+ private static final int MAX_HASH_COLLISIONS = 40;
+
+ private static final int MULTIPLIERS_SIZE = 1 << 5;
+ private static final int MULTIPLIERS_MASK = MULTIPLIERS_SIZE - 1;
//
// Data
//
private Object[][] fHashTable = new Object[HASHTABLE_SIZE][];
+
+ /** actual table size **/
+ private int fTableSize = HASHTABLE_SIZE;
+
+ /** The total number of entries in the hash table. */
+ private int fCount = 0;
+
+ /**
+ * Array of randomly selected hash function multipliers or
<code>null</code>
+ * if the default String.hashCode() function should be used.
+ */
+ private int[] fHashMultipliers;
//
// Public methods
@@ -2663,7 +2702,7 @@ public class DTDGrammar
/** Associates the given value with the specified key tuple. */
public void put(String key, int value) {
- int hash = (key.hashCode() & 0x7FFFFFFF) % HASHTABLE_SIZE;
+ int hash = (hash(key) & 0x7FFFFFFF) % fTableSize;
Object[] bucket = fHashTable[hash];
if (bucket == null) {
@@ -2672,6 +2711,11 @@ public class DTDGrammar
bucket[1] = key;
bucket[2] = new int[]{value};
fHashTable[hash] = bucket;
+ if (++fCount > fTableSize) {
+ // Rehash the table if the number of entries
+ // would exceed the number of buckets.
+ rehash();
+ }
} else {
int count = ((int[])bucket[0])[0];
int offset = 1 + 2*count;
@@ -2692,10 +2736,20 @@ public class DTDGrammar
}
j += 2;
}
- if (! found) {
+ if (!found) {
bucket[offset++] = key;
bucket[offset]= new int[]{value};
((int[])bucket[0])[0] = ++count;
+ if (++fCount > fTableSize) {
+ // Rehash the table if the number of entries
+ // would exceed the number of buckets.
+ rehash();
+ }
+ else if (count > MAX_HASH_COLLISIONS) {
+ // Select a new hash function and rehash the table if
+ // MAX_HASH_COLLISIONS is exceeded.
+ rebalance();
+ }
}
}
@@ -2706,7 +2760,7 @@ public class DTDGrammar
/** Returns the value associated with the specified key tuple. */
public int get(String key) {
- int hash = (key.hashCode() & 0x7FFFFFFF) % HASHTABLE_SIZE;
+ int hash = (hash(key) & 0x7FFFFFFF) % fTableSize;
Object[] bucket = fHashTable[hash];
if (bucket == null) {
@@ -2724,6 +2778,93 @@ public class DTDGrammar
return -1;
} // get(int,String,String)
+
+ public int hash(String symbol) {
+ if (fHashMultipliers == null) {
+ return symbol.hashCode();
+ }
+ return hash0(symbol);
+ } // hash(String):int
+
+ private int hash0(String symbol) {
+ int code = 0;
+ final int length = symbol.length();
+ final int[] multipliers = fHashMultipliers;
+ for (int i = 0; i < length; ++i) {
+ code = code * multipliers[i & MULTIPLIERS_MASK] +
symbol.charAt(i);
+ }
+ return code;
+ } // hash0(String):int
+
+ private void rehash() {
+ rehashCommon(fHashTable.length * 2 + 1);
+ } // rehash()
+
+ private void rebalance() {
+ if (fHashMultipliers == null) {
+ fHashMultipliers = new int[MULTIPLIERS_SIZE];
+ }
+ PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers);
+ rehashCommon(fHashTable.length);
+ } // rebalance()
+
+ private void rehashCommon(final int newCapacity) {
+
+ final int oldCapacity = fHashTable.length;
+ final Object[][] oldTable = fHashTable;
+
+ final Object[][] newTable = new Object[newCapacity][];
+
+ fHashTable = newTable;
+ fTableSize = fHashTable.length;
+
+ for (int i = 0; i < oldCapacity; ++i) {
+ final Object[] oldBucket = oldTable[i];
+ if (oldBucket != null) {
+ final int oldCount = ((int[]) oldBucket[0])[0];
+ boolean oldBucketReused = false;
+ int k = 1;
+ for (int j = 0; j < oldCount; ++j) {
+ final String key = (String) oldBucket[k];
+ final Object value = oldBucket[k+1];
+
+ final int hash = (hash(key) & 0x7FFFFFFF) % fTableSize;
+ Object[] bucket = fHashTable[hash];
+
+ if (bucket == null) {
+ if (oldBucketReused) {
+ bucket = new Object[1 + 2*INITIAL_BUCKET_SIZE];
+ bucket[0] = new int[]{1};
+ }
+ else {
+ bucket = oldBucket;
+ ((int[])bucket[0])[0] = 1;
+ oldBucketReused = true;
+ }
+ bucket[1] = key;
+ bucket[2] = value;
+ fHashTable[hash] = bucket;
+ }
+ else {
+ int count = ((int[])bucket[0])[0];
+ int offset = 1 + 2*count;
+ if (offset == bucket.length) {
+ int newSize = count + INITIAL_BUCKET_SIZE;
+ Object[] newBucket = new Object[1 + 2*newSize];
+ System.arraycopy(bucket, 0, newBucket, 0,
offset);
+ bucket = newBucket;
+ fHashTable[hash] = bucket;
+ }
+ bucket[offset++] = key;
+ bucket[offset]= value;
+ ((int[])bucket[0])[0] = ++count;
+ }
+ k += 2;
+ }
+ }
+ }
+
+ } // rehashCommon(int)
} // class QNameHashtable
Added:
xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java
URL:
http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java?rev=1357381&view=auto
==============================================================================
---
xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java
(added)
+++
xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java
Wed Jul 4 19:56:10 2012
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.xerces.util;
+
+import java.util.Random;
+
+/**
+ * @version $Id$
+ */
+final class PrimeNumberSequenceGenerator {
+
+ private static int [] PRIMES = {
+ 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47,
53, 59,
+ 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127,
131, 137,
+ 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
223, 227,
+ 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307,
311, 313,
+ 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401,
409, 419,
+ 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499,
503, 509,
+ 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607,
613, 617,
+ 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709,
719, 727};
+
+ static void generateSequence(int[] arrayToFill) {
+ Random r = new Random();
+ for (int i = 0; i < arrayToFill.length; ++i) {
+ arrayToFill[i] = PRIMES[r.nextInt(PRIMES.length)];
+ }
+ }
+}
Propchange:
xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange:
xerces/java/trunk/src/org/apache/xerces/util/PrimeNumberSequenceGenerator.java
------------------------------------------------------------------------------
svn:keywords = Author Date Id Revision
Modified:
xerces/java/trunk/src/org/apache/xerces/util/SoftReferenceSymbolTable.java
URL:
http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/util/SoftReferenceSymbolTable.java?rev=1357381&r1=1357380&r2=1357381&view=diff
==============================================================================
--- xerces/java/trunk/src/org/apache/xerces/util/SoftReferenceSymbolTable.java
(original)
+++ xerces/java/trunk/src/org/apache/xerces/util/SoftReferenceSymbolTable.java
Wed Jul 4 19:56:10 2012
@@ -127,6 +127,7 @@ public class SoftReferenceSymbolTable ex
public String addSymbol(String symbol) {
clean();
// search for identical symbol
+ int collisionCount = 0;
int bucket = hash(symbol) % fTableSize;
for (SREntry entry = fBuckets[bucket]; entry != null; entry =
entry.next) {
SREntryData data = (SREntryData)entry.get();
@@ -136,13 +137,20 @@ public class SoftReferenceSymbolTable ex
if (data.symbol.equals(symbol)) {
return data.symbol;
}
+ ++collisionCount;
}
if (fCount >= fThreshold) {
// Rehash the table if the threshold is exceeded
rehash();
bucket = hash(symbol) % fTableSize;
- }
+ }
+ else if (collisionCount >= fCollisionThreshold) {
+ // Select a new hash function and rehash the table if
+ // the collision threshold is exceeded.
+ rebalance();
+ bucket = hash(symbol) % fTableSize;
+ }
// add new entry
symbol = symbol.intern();
@@ -165,6 +173,7 @@ public class SoftReferenceSymbolTable ex
public String addSymbol(char[] buffer, int offset, int length) {
clean();
// search for identical symbol
+ int collisionCount = 0;
int bucket = hash(buffer, offset, length) % fTableSize;
OUTER: for (SREntry entry = fBuckets[bucket]; entry != null; entry =
entry.next) {
SREntryData data = (SREntryData)entry.get();
@@ -174,18 +183,26 @@ public class SoftReferenceSymbolTable ex
if (length == data.characters.length) {
for (int i = 0; i < length; i++) {
if (buffer[offset + i] != data.characters[i]) {
+ ++collisionCount;
continue OUTER;
}
}
return data.symbol;
}
+ ++collisionCount;
}
if (fCount >= fThreshold) {
// Rehash the table if the threshold is exceeded
rehash();
bucket = hash(buffer, offset, length) % fTableSize;
- }
+ }
+ else if (collisionCount >= fCollisionThreshold) {
+ // Select a new hash function and rehash the table if
+ // the collision threshold is exceeded.
+ rebalance();
+ bucket = hash(buffer, offset, length) % fTableSize;
+ }
// add new entry
String symbol = new String(buffer, offset, length).intern();
@@ -218,6 +235,20 @@ public class SoftReferenceSymbolTable ex
rehashCommon(((int) (fCount / fLoadFactor)) * 2 + 1);
}
+ /**
+ * Randomly selects a new hash function and reorganizes this SymbolTable
+ * in order to more evenly distribute its entries across the table. This
+ * method is called automatically when the number keys in one of the
+ * SymbolTable's buckets exceeds the given collision threshold.
+ */
+ protected void rebalance() {
+ if (fHashMultipliers == null) {
+ fHashMultipliers = new int[MULTIPLIERS_SIZE];
+ }
+ PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers);
+ rehashCommon(fBuckets.length);
+ }
+
private void rehashCommon(final int newCapacity) {
final int oldCapacity = fBuckets.length;
@@ -235,7 +266,7 @@ public class SoftReferenceSymbolTable ex
SREntryData data = (SREntryData)e.get();
if (data != null) {
- int index = hash(data.characters, 0,
data.characters.length) % newCapacity;
+ int index = hash(data.symbol) % newCapacity;
if (newTable[index] != null) {
newTable[index].prev = e;
}
Modified: xerces/java/trunk/src/org/apache/xerces/util/SymbolHash.java
URL:
http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/util/SymbolHash.java?rev=1357381&r1=1357380&r2=1357381&view=diff
==============================================================================
--- xerces/java/trunk/src/org/apache/xerces/util/SymbolHash.java (original)
+++ xerces/java/trunk/src/org/apache/xerces/util/SymbolHash.java Wed Jul 4
19:56:10 2012
@@ -31,19 +31,34 @@ public class SymbolHash {
//
// Constants
//
-
+
/** Default table size. */
- protected int fTableSize = 101;
+ protected static final int TABLE_SIZE = 101;
+
+ /** Maximum hash collisions per bucket. */
+ protected static final int MAX_HASH_COLLISIONS = 40;
+
+ protected static final int MULTIPLIERS_SIZE = 1 << 5;
+ protected static final int MULTIPLIERS_MASK = MULTIPLIERS_SIZE - 1;
//
// Data
//
+
+ /** Actual table size **/
+ protected int fTableSize;
/** Buckets. */
protected Entry[] fBuckets;
/** Number of elements. */
protected int fNum = 0;
+
+ /**
+ * Array of randomly selected hash function multipliers or
<code>null</code>
+ * if the default String.hashCode() function should be used.
+ */
+ protected int[] fHashMultipliers;
//
// Constructors
@@ -51,7 +66,7 @@ public class SymbolHash {
/** Constructs a key table with the default size. */
public SymbolHash() {
- fBuckets = new Entry[fTableSize];
+ this(TABLE_SIZE);
}
/**
@@ -77,26 +92,37 @@ public class SymbolHash {
* @param value
*/
public void put(Object key, Object value) {
+
+ // search for identical key
+ int collisionCount = 0;
final int hash = hash(key);
int bucket = hash % fTableSize;
- Entry entry = search(key, bucket);
-
- // replace old value
- if (entry != null) {
- entry.value = value;
- }
- // create new entry
- else {
- if (fNum >= fTableSize) {
- // Rehash the table if the number of entries
- // would exceed the number of buckets.
- rehash();
- bucket = hash % fTableSize;
+ for (Entry entry = fBuckets[bucket]; entry != null; entry =
entry.next) {
+ if (key.equals(entry.key)) {
+ // replace old value
+ entry.value = value;
+ return;
}
- entry = new Entry(key, value, fBuckets[bucket]);
- fBuckets[bucket] = entry;
- fNum++;
+ ++collisionCount;
+ }
+
+ if (fNum >= fTableSize) {
+ // Rehash the table if the number of entries
+ // would exceed the number of buckets.
+ rehash();
+ bucket = hash % fTableSize;
+ }
+ else if (collisionCount >= MAX_HASH_COLLISIONS && key instanceof
String) {
+ // Select a new hash function and rehash the table if
+ // MAX_HASH_COLLISIONS is exceeded.
+ rebalance();
+ bucket = hash(key) % fTableSize;
}
+
+ // create new entry
+ Entry entry = new Entry(key, value, fBuckets[bucket]);
+ fBuckets[bucket] = entry;
+ ++fNum;
}
/**
@@ -161,6 +187,7 @@ public class SymbolHash {
public SymbolHash makeClone() {
SymbolHash newTable = new SymbolHash(fTableSize);
newTable.fNum = fNum;
+ newTable.fHashMultipliers = fHashMultipliers != null ? (int[])
fHashMultipliers.clone() : null;
for (int i = 0; i < fTableSize; i++) {
if (fBuckets[i] != null) {
newTable.fBuckets[i] = fBuckets[i].makeClone();
@@ -178,6 +205,7 @@ public class SymbolHash {
fBuckets[i] = null;
}
fNum = 0;
+ fHashMultipliers = null;
} // clear(): void
protected Entry search(Object key, int bucket) {
@@ -195,8 +223,21 @@ public class SymbolHash {
* @param key The key to hash.
*/
protected int hash(Object key) {
- return key.hashCode() & 0x7FFFFFFF;
- }
+ if (fHashMultipliers == null || !(key instanceof String)) {
+ return key.hashCode() & 0x7FFFFFFF;
+ }
+ return hash0((String) key);
+ } // hash(Object):int
+
+ private int hash0(String symbol) {
+ int code = 0;
+ final int length = symbol.length();
+ final int[] multipliers = fHashMultipliers;
+ for (int i = 0; i < length; ++i) {
+ code = code * multipliers[i & MULTIPLIERS_MASK] + symbol.charAt(i);
+ }
+ return code & 0x7FFFFFFF;
+ } // hash0(String):int
/**
* Increases the capacity of and internally reorganizes this
@@ -205,11 +246,28 @@ public class SymbolHash {
* number of keys in the SymbolHash exceeds its number of buckets.
*/
protected void rehash() {
-
+ rehashCommon((fBuckets.length << 1) + 1);
+ }
+
+ /**
+ * Randomly selects a new hash function and reorganizes this SymbolHash
+ * in order to more evenly distribute its entries across the table. This
+ * method is called automatically when the number keys in one of the
+ * SymbolHash's buckets exceeds MAX_HASH_COLLISIONS.
+ */
+ protected void rebalance() {
+ if (fHashMultipliers == null) {
+ fHashMultipliers = new int[MULTIPLIERS_SIZE];
+ }
+ PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers);
+ rehashCommon(fBuckets.length);
+ }
+
+ private void rehashCommon(final int newCapacity) {
+
final int oldCapacity = fBuckets.length;
final Entry[] oldTable = fBuckets;
- final int newCapacity = (oldCapacity << 1) + 1;
final Entry[] newTable = new Entry[newCapacity];
fBuckets = newTable;
Modified: xerces/java/trunk/src/org/apache/xerces/util/SymbolTable.java
URL:
http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/util/SymbolTable.java?rev=1357381&r1=1357380&r2=1357381&view=diff
==============================================================================
--- xerces/java/trunk/src/org/apache/xerces/util/SymbolTable.java (original)
+++ xerces/java/trunk/src/org/apache/xerces/util/SymbolTable.java Wed Jul 4
19:56:10 2012
@@ -84,6 +84,12 @@ public class SymbolTable {
/** Default table size. */
protected static final int TABLE_SIZE = 101;
+
+ /** Maximum hash collisions per bucket for a table with load factor == 1.
*/
+ protected static final int MAX_HASH_COLLISIONS = 40;
+
+ protected static final int MULTIPLIERS_SIZE = 1 << 5;
+ protected static final int MULTIPLIERS_MASK = MULTIPLIERS_SIZE - 1;
//
// Data
@@ -104,6 +110,18 @@ public class SymbolTable {
/** The load factor for the SymbolTable. */
protected float fLoadFactor;
+
+ /**
+ * A new hash function is selected and the table is rehashed when
+ * the number of keys in the bucket exceeds this threshold.
+ */
+ protected final int fCollisionThreshold;
+
+ /**
+ * Array of randomly selected hash function multipliers or
<code>null</code>
+ * if the default String.hashCode() function should be used.
+ */
+ protected int[] fHashMultipliers;
//
// Constructors
@@ -136,6 +154,7 @@ public class SymbolTable {
fTableSize = initialCapacity;
fBuckets = new Entry[fTableSize];
fThreshold = (int)(fTableSize * loadFactor);
+ fCollisionThreshold = (int)(MAX_HASH_COLLISIONS * loadFactor);
fCount = 0;
}
@@ -174,18 +193,26 @@ public class SymbolTable {
public String addSymbol(String symbol) {
// search for identical symbol
+ int collisionCount = 0;
int bucket = hash(symbol) % fTableSize;
for (Entry entry = fBuckets[bucket]; entry != null; entry =
entry.next) {
if (entry.symbol.equals(symbol)) {
return entry.symbol;
}
+ ++collisionCount;
}
if (fCount >= fThreshold) {
// Rehash the table if the threshold is exceeded
rehash();
bucket = hash(symbol) % fTableSize;
- }
+ }
+ else if (collisionCount >= fCollisionThreshold) {
+ // Select a new hash function and rehash the table if
+ // the collision threshold is exceeded.
+ rebalance();
+ bucket = hash(symbol) % fTableSize;
+ }
// create new entry
Entry entry = new Entry(symbol, fBuckets[bucket]);
@@ -208,23 +235,32 @@ public class SymbolTable {
public String addSymbol(char[] buffer, int offset, int length) {
// search for identical symbol
+ int collisionCount = 0;
int bucket = hash(buffer, offset, length) % fTableSize;
OUTER: for (Entry entry = fBuckets[bucket]; entry != null; entry =
entry.next) {
if (length == entry.characters.length) {
for (int i = 0; i < length; i++) {
if (buffer[offset + i] != entry.characters[i]) {
+ ++collisionCount;
continue OUTER;
}
}
return entry.symbol;
}
+ ++collisionCount;
}
if (fCount >= fThreshold) {
// Rehash the table if the threshold is exceeded
rehash();
bucket = hash(buffer, offset, length) % fTableSize;
- }
+ }
+ else if (collisionCount >= fCollisionThreshold) {
+ // Select a new hash function and rehash the table if
+ // the collision threshold is exceeded.
+ rebalance();
+ bucket = hash(buffer, offset, length) % fTableSize;
+ }
// add new entry
Entry entry = new Entry(buffer, offset, length, fBuckets[bucket]);
@@ -243,8 +279,21 @@ public class SymbolTable {
* @param symbol The symbol to hash.
*/
public int hash(String symbol) {
- return symbol.hashCode() & 0x7FFFFFFF;
+ if (fHashMultipliers == null) {
+ return symbol.hashCode() & 0x7FFFFFFF;
+ }
+ return hash0(symbol);
} // hash(String):int
+
+ private int hash0(String symbol) {
+ int code = 0;
+ final int length = symbol.length();
+ final int[] multipliers = fHashMultipliers;
+ for (int i = 0; i < length; ++i) {
+ code = code * multipliers[i & MULTIPLIERS_MASK] + symbol.charAt(i);
+ }
+ return code & 0x7FFFFFFF;
+ } // hash0(String):int
/**
* Returns a hashcode value for the specified symbol information.
@@ -258,14 +307,25 @@ public class SymbolTable {
* @param length The length of the symbol.
*/
public int hash(char[] buffer, int offset, int length) {
+ if (fHashMultipliers == null) {
+ int code = 0;
+ for (int i = 0; i < length; ++i) {
+ code = code * 31 + buffer[offset + i];
+ }
+ return code & 0x7FFFFFFF;
+ }
+ return hash0(buffer, offset, length);
+ } // hash(char[],int,int):int
+
+ private int hash0(char[] buffer, int offset, int length) {
int code = 0;
+ final int[] multipliers = fHashMultipliers;
for (int i = 0; i < length; ++i) {
- code = code * 31 + buffer[offset + i];
+ code = code * multipliers[i & MULTIPLIERS_MASK] + buffer[offset +
i];
}
return code & 0x7FFFFFFF;
-
- } // hash(char[],int,int):int
+ } // hash0(char[],int,int):int
/**
* Increases the capacity of and internally reorganizes this
@@ -275,11 +335,28 @@ public class SymbolTable {
* and load factor.
*/
protected void rehash() {
-
+ rehashCommon(fBuckets.length * 2 + 1);
+ }
+
+ /**
+ * Randomly selects a new hash function and reorganizes this SymbolTable
+ * in order to more evenly distribute its entries across the table. This
+ * method is called automatically when the number keys in one of the
+ * SymbolTable's buckets exceeds the given collision threshold.
+ */
+ protected void rebalance() {
+ if (fHashMultipliers == null) {
+ fHashMultipliers = new int[MULTIPLIERS_SIZE];
+ }
+ PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers);
+ rehashCommon(fBuckets.length);
+ }
+
+ private void rehashCommon(final int newCapacity) {
+
int oldCapacity = fBuckets.length;
Entry[] oldTable = fBuckets;
- int newCapacity = oldCapacity * 2 + 1;
Entry[] newTable = new Entry[newCapacity];
fThreshold = (int)(newCapacity * fLoadFactor);
@@ -291,7 +368,7 @@ public class SymbolTable {
Entry e = old;
old = old.next;
- int index = hash(e.characters, 0, e.characters.length) %
newCapacity;
+ int index = hash(e.symbol) % newCapacity;
e.next = newTable[index];
newTable[index] = e;
}
Modified: xerces/java/trunk/src/org/apache/xerces/util/XMLAttributesImpl.java
URL:
http://svn.apache.org/viewvc/xerces/java/trunk/src/org/apache/xerces/util/XMLAttributesImpl.java?rev=1357381&r1=1357380&r2=1357381&view=diff
==============================================================================
--- xerces/java/trunk/src/org/apache/xerces/util/XMLAttributesImpl.java
(original)
+++ xerces/java/trunk/src/org/apache/xerces/util/XMLAttributesImpl.java Wed Jul
4 19:56:10 2012
@@ -50,6 +50,12 @@ public class XMLAttributesImpl
/** Default table size. */
protected static final int TABLE_SIZE = 101;
+ /** Maximum hash collisions per bucket. */
+ protected static final int MAX_HASH_COLLISIONS = 40;
+
+ protected static final int MULTIPLIERS_SIZE = 1 << 5;
+ protected static final int MULTIPLIERS_MASK = MULTIPLIERS_SIZE - 1;
+
/**
* Threshold at which an instance is treated
* as a large attribute list.
@@ -103,6 +109,12 @@ public class XMLAttributesImpl
* Indicates whether the table view contains consistent data.
*/
protected boolean fIsTableViewConsistent;
+
+ /**
+ * Array of randomly selected hash function multipliers or
<code>null</code>
+ * if the default String.hashCode() function should be used.
+ */
+ protected int[] fHashMultipliers;
//
// Constructors
@@ -203,7 +215,8 @@ public class XMLAttributesImpl
* the user of this class adds attributes, removes them, and
* then adds more.
*/
- if (!fIsTableViewConsistent || fLength == SIZE_LIMIT) {
+ if (!fIsTableViewConsistent || fLength == SIZE_LIMIT ||
+ (fLength > SIZE_LIMIT && fLength > fTableViewBuckets)) {
prepareAndPopulateTableView();
fIsTableViewConsistent = true;
}
@@ -232,12 +245,14 @@ public class XMLAttributesImpl
// We need to check if any of the attributes has the same rawname.
else {
// Search the table.
+ int collisionCount = 0;
Attribute found = fAttributeTableView[bucket];
while (found != null) {
if (found.name.rawname == name.rawname) {
break;
}
found = found.next;
+ ++collisionCount;
}
// This attribute is unique.
if (found == null) {
@@ -250,10 +265,19 @@ public class XMLAttributesImpl
}
fAttributes = attributes;
}
-
- // Update table view
- fAttributes[index].next = fAttributeTableView[bucket];
- fAttributeTableView[bucket] = fAttributes[index];
+ // Select a new hash function and rehash the table view
+ // if the collision threshold is exceeded.
+ if (collisionCount >= MAX_HASH_COLLISIONS) {
+ // The current attribute will be processed in the
rehash.
+ // Need to set its name first.
+ fAttributes[index].name.setValues(name);
+ rebalanceTableView(fLength);
+ }
+ else {
+ // Update table view
+ fAttributes[index].next = fAttributeTableView[bucket];
+ fAttributeTableView[bucket] = fAttributes[index];
+ }
}
// Duplicate. We still need to find the index.
else {
@@ -829,60 +853,84 @@ public class XMLAttributesImpl
*/
public QName checkDuplicatesNS() {
// If the list is small check for duplicates using pairwise comparison.
- if (fLength <= SIZE_LIMIT) {
- for (int i = 0; i < fLength - 1; ++i) {
- Attribute att1 = fAttributes[i];
- for (int j = i + 1; j < fLength; ++j) {
- Attribute att2 = fAttributes[j];
+ final int length = fLength;
+ if (length <= SIZE_LIMIT) {
+ final Attribute[] attributes = fAttributes;
+ for (int i = 0; i < length - 1; ++i) {
+ Attribute att1 = attributes[i];
+ for (int j = i + 1; j < length; ++j) {
+ Attribute att2 = attributes[j];
if (att1.name.localpart == att2.name.localpart &&
att1.name.uri == att2.name.uri) {
- return att2.name;
+ return att2.name;
}
}
}
+ return null;
}
// If the list is large check duplicates using a hash table.
else {
- // We don't want this table view to be read if someone calls
- // addAttribute so we invalidate it up front.
- fIsTableViewConsistent = false;
-
- prepareTableView();
-
- Attribute attr;
- int bucket;
-
- for (int i = fLength - 1; i >= 0; --i) {
- attr = fAttributes[i];
- bucket = getTableViewBucket(attr.name.localpart,
attr.name.uri);
+ return checkManyDuplicatesNS();
+ }
+ }
+
+ private QName checkManyDuplicatesNS() {
+ // We don't want this table view to be read if someone calls
+ // addAttribute so we invalidate it up front.
+ fIsTableViewConsistent = false;
+
+ prepareTableView();
+
+ Attribute attr;
+ int bucket;
+
+ final int length = fLength;
+ final Attribute[] attributes = fAttributes;
+ final Attribute[] attributeTableView = fAttributeTableView;
+ final int[] attributeTableViewChainState =
fAttributeTableViewChainState;
+ int largeCount = fLargeCount;
+
+ for (int i = 0; i < length; ++i) {
+ attr = attributes[i];
+ bucket = getTableViewBucket(attr.name.localpart, attr.name.uri);
+
+ // The chain is stale.
+ // This must be a unique attribute.
+ if (attributeTableViewChainState[bucket] != largeCount) {
+ attributeTableViewChainState[bucket] = largeCount;
+ attr.next = null;
+ attributeTableView[bucket] = attr;
+ }
+ // This chain is active.
+ // We need to check if any of the attributes has the same name.
+ else {
+ // Search the table.
+ int collisionCount = 0;
+ Attribute found = attributeTableView[bucket];
+ while (found != null) {
+ if (found.name.localpart == attr.name.localpart &&
+ found.name.uri == attr.name.uri) {
+ return attr.name;
+ }
+ found = found.next;
+ ++collisionCount;
+ }
- // The chain is stale.
- // This must be a unique attribute.
- if (fAttributeTableViewChainState[bucket] != fLargeCount) {
- fAttributeTableViewChainState[bucket] = fLargeCount;
- attr.next = null;
- fAttributeTableView[bucket] = attr;
- }
- // This chain is active.
- // We need to check if any of the attributes has the same name.
+ // Select a new hash function and rehash the table view
+ // if the collision threshold is exceeded.
+ if (collisionCount >= MAX_HASH_COLLISIONS) {
+ // The current attribute will be processed in the rehash.
+ rebalanceTableViewNS(i+1);
+ largeCount = fLargeCount;
+ }
else {
- // Search the table.
- Attribute found = fAttributeTableView[bucket];
- while (found != null) {
- if (found.name.localpart == attr.name.localpart &&
- found.name.uri == attr.name.uri) {
- return attr.name;
- }
- found = found.next;
- }
-
// Update table view
- attr.next = fAttributeTableView[bucket];
- fAttributeTableView[bucket] = attr;
+ attr.next = attributeTableView[bucket];
+ attributeTableView[bucket] = attr;
}
}
- }
- return null;
+ }
+ return null;
}
/**
@@ -933,7 +981,7 @@ public class XMLAttributesImpl
* would be hashed
*/
protected int getTableViewBucket(String qname) {
- return (qname.hashCode() & 0x7FFFFFFF) % fTableViewBuckets;
+ return (hash(qname) & 0x7FFFFFFF) % fTableViewBuckets;
}
/**
@@ -947,13 +995,36 @@ public class XMLAttributesImpl
*/
protected int getTableViewBucket(String localpart, String uri) {
if (uri == null) {
- return (localpart.hashCode() & 0x7FFFFFFF) % fTableViewBuckets;
+ return (hash(localpart) & 0x7FFFFFFF) % fTableViewBuckets;
}
else {
- return ((localpart.hashCode() + uri.hashCode())
- & 0x7FFFFFFF) % fTableViewBuckets;
+ return (hash(localpart, uri) & 0x7FFFFFFF) % fTableViewBuckets;
}
}
+
+ private int hash(String localpart) {
+ if (fHashMultipliers == null) {
+ return localpart.hashCode();
+ }
+ return hash0(localpart);
+ } // hash(String):int
+
+ private int hash(String localpart, String uri) {
+ if (fHashMultipliers == null) {
+ return localpart.hashCode() + uri.hashCode() * 31;
+ }
+ return hash0(localpart) + hash0(uri) *
fHashMultipliers[MULTIPLIERS_SIZE];
+ } // hash(String,String):int
+
+ private int hash0(String symbol) {
+ int code = 0;
+ final int length = symbol.length();
+ final int[] multipliers = fHashMultipliers;
+ for (int i = 0; i < length; ++i) {
+ code = code * multipliers[i & MULTIPLIERS_MASK] + symbol.charAt(i);
+ }
+ return code;
+ } // hash0(String):int
/**
* Purges all elements from the table view.
@@ -971,9 +1042,31 @@ public class XMLAttributesImpl
}
/**
+ * Increases the capacity of the table view.
+ */
+ private void growTableView() {
+ final int length = fLength;
+ int tableViewBuckets = fTableViewBuckets;
+ do {
+ tableViewBuckets = (tableViewBuckets << 1) + 1;
+ if (tableViewBuckets < 0) {
+ tableViewBuckets = Integer.MAX_VALUE;
+ break;
+ }
+ }
+ while (length > tableViewBuckets);
+ fTableViewBuckets = tableViewBuckets;
+ fAttributeTableView = null;
+ fLargeCount = 1;
+ }
+
+ /**
* Prepares the table view of the attributes list for use.
*/
protected void prepareTableView() {
+ if (fLength > fTableViewBuckets) {
+ growTableView();
+ }
if (fAttributeTableView == null) {
fAttributeTableView = new Attribute[fTableViewBuckets];
fAttributeTableViewChainState = new int[fTableViewBuckets];
@@ -989,11 +1082,15 @@ public class XMLAttributesImpl
* previously read.
*/
protected void prepareAndPopulateTableView() {
+ prepareAndPopulateTableView(fLength);
+ }
+
+ private void prepareAndPopulateTableView(final int count) {
prepareTableView();
- // Need to populate the hash table with the attributes we've scanned
so far.
+ // Need to populate the hash table with the attributes we've processed
so far.
Attribute attr;
int bucket;
- for (int i = 0; i < fLength; ++i) {
+ for (int i = 0; i < count; ++i) {
attr = fAttributes[i];
bucket = getTableViewBucket(attr.name.rawname);
if (fAttributeTableViewChainState[bucket] != fLargeCount) {
@@ -1008,6 +1105,55 @@ public class XMLAttributesImpl
}
}
}
+
+ private void prepareAndPopulateTableViewNS(final int count) {
+ prepareTableView();
+ // Need to populate the hash table with the attributes we've processed
so far.
+ Attribute attr;
+ int bucket;
+ for (int i = 0; i < count; ++i) {
+ attr = fAttributes[i];
+ bucket = getTableViewBucket(attr.name.localpart, attr.name.uri);
+ if (fAttributeTableViewChainState[bucket] != fLargeCount) {
+ fAttributeTableViewChainState[bucket] = fLargeCount;
+ attr.next = null;
+ fAttributeTableView[bucket] = attr;
+ }
+ else {
+ // Update table view
+ attr.next = fAttributeTableView[bucket];
+ fAttributeTableView[bucket] = attr;
+ }
+ }
+ }
+
+ /**
+ * Randomly selects a new hash function and reorganizes the table view
+ * in order to more evenly distribute its entries. This method is called
+ * automatically when the number of attributes in one bucket exceeds
+ * MAX_HASH_COLLISIONS.
+ */
+ private void rebalanceTableView(final int count) {
+ if (fHashMultipliers == null) {
+ fHashMultipliers = new int[MULTIPLIERS_SIZE + 1];
+ }
+ PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers);
+ prepareAndPopulateTableView(count);
+ }
+
+ /**
+ * Randomly selects a new hash function and reorganizes the table view
+ * in order to more evenly distribute its entries. This method is called
+ * automatically when the number of attributes in one bucket exceeds
+ * MAX_HASH_COLLISIONS.
+ */
+ private void rebalanceTableViewNS(final int count) {
+ if (fHashMultipliers == null) {
+ fHashMultipliers = new int[MULTIPLIERS_SIZE + 1];
+ }
+ PrimeNumberSequenceGenerator.generateSequence(fHashMultipliers);
+ prepareAndPopulateTableViewNS(count);
+ }
//
// Classes
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]