Author: amassari
Date: Tue Jul 28 09:08:05 2009
New Revision: 798456
URL: http://svn.apache.org/viewvc?rev=798456&view=rev
Log:
Improve scalability of identity checking by using a hash table whose hash value
is computed on the canonical values of the values (XERCESC-1878)
Added:
xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/
xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/invalid.xml
xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/schema.xsd
xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/valid.xml
Modified:
xerces/c/trunk/src/xercesc/validators/schema/identity/ValueStore.cpp
xerces/c/trunk/src/xercesc/validators/schema/identity/ValueStore.hpp
xerces/c/trunk/tests/src/XSTSHarness/XSTSHarnessHandlers.cpp
xerces/c/trunk/tests/src/XSTSHarness/regression/Xerces.testSet
Modified: xerces/c/trunk/src/xercesc/validators/schema/identity/ValueStore.cpp
URL:
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/validators/schema/identity/ValueStore.cpp?rev=798456&r1=798455&r2=798456&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/validators/schema/identity/ValueStore.cpp
(original)
+++ xerces/c/trunk/src/xercesc/validators/schema/identity/ValueStore.cpp Tue
Jul 28 09:08:05 2009
@@ -33,6 +33,98 @@
XERCES_CPP_NAMESPACE_BEGIN
+//
+// ---------------------------------------------------------------------------
+// ICValueHasher: the hasher for identity constraints values
+// ---------------------------------------------------------------------------
+XMLSize_t ICValueHasher::getHashVal(const void* key, XMLSize_t mod) const
+{
+ const FieldValueMap* valueMap=(const FieldValueMap*)key;
+ XMLSize_t hashVal = 0;
+
+ XMLSize_t size = valueMap->size();
+ for (XMLSize_t j=0; j<size; j++) {
+ DatatypeValidator* const dv = valueMap->getDatatypeValidatorAt(j);
+ const XMLCh* const val = valueMap->getValueAt(j);
+ const XMLCh* canonVal = (dv &&
val)?dv->getCanonicalRepresentation(val, fMemoryManager):0;
+ if(canonVal)
+ {
+ hashVal += XMLString::hash(canonVal, mod);
+ fMemoryManager->deallocate((void*)canonVal);
+ }
+ else if(val)
+ hashVal += XMLString::hash(val, mod);
+ }
+
+ return hashVal % mod;
+}
+
+bool ICValueHasher::equals(const void *const key1, const void *const key2)
const
+{
+ const FieldValueMap* left=(const FieldValueMap*)key1;
+ const FieldValueMap* right=(const FieldValueMap*)key2;
+
+ XMLSize_t lSize = left->size();
+ XMLSize_t rSize = right->size();
+ if (lSize == rSize)
+ {
+ bool matchFound = true;
+
+ for (XMLSize_t j=0; j<rSize; j++) {
+ if (!isDuplicateOf(left->getDatatypeValidatorAt(j),
left->getValueAt(j),
+ right->getDatatypeValidatorAt(j),
right->getValueAt(j))) {
+ matchFound = false;
+ break;
+ }
+ }
+
+ if (matchFound) { // found it
+ return true;
+ }
+ }
+ return false;
+}
+
+bool ICValueHasher::isDuplicateOf(DatatypeValidator* const dv1, const XMLCh*
const val1,
+ DatatypeValidator* const dv2, const XMLCh*
const val2) const
+{
+
+ // if either validator's null, fall back on string comparison
+ if(!dv1 || !dv2) {
+ return (XMLString::equals(val1, val2));
+ }
+
+ bool val1IsEmpty = (val1==0 || *val1==0);
+ bool val2IsEmpty = (val2==0 || *val2==0);
+
+ if (val1IsEmpty && val2IsEmpty) {
+
+ if (dv1 == dv2) {
+ return true;
+ }
+
+ return false;
+ }
+
+ if (val1IsEmpty || val2IsEmpty) {
+ return false;
+ }
+
+ // find the common ancestor, if there is one
+ DatatypeValidator* tempVal1 = dv1;
+ while(tempVal1)
+ {
+ DatatypeValidator* tempVal2 = dv2;
+ for(; tempVal2 != NULL && tempVal2 != tempVal1; tempVal2 =
tempVal2->getBaseValidator()) ;
+ if (tempVal2)
+ return ((tempVal2->compare(val1, val2, fMemoryManager)) == 0);
+ tempVal1=tempVal1->getBaseValidator();
+ }
+
+ // if we're here it means the types weren't related. They are different:
+ return false;
+}
+
// ---------------------------------------------------------------------------
// ValueStore: Constructors and Destructor
// ---------------------------------------------------------------------------
@@ -98,10 +190,11 @@
// store values
if (!fValueTuples) {
- fValueTuples = new (fMemoryManager) RefVectorOf<FieldValueMap>(4,
true, fMemoryManager);
+ fValueTuples = new (fMemoryManager) RefHashTableOf<FieldValueMap,
ICValueHasher>(107, true, ICValueHasher(fMemoryManager), fMemoryManager);
}
- fValueTuples->addElement(new (fMemoryManager) FieldValueMap(fValues));
+ FieldValueMap* pICItem = new (fMemoryManager) FieldValueMap(fValues);
+ fValueTuples->put(pICItem, pICItem);
}
}
@@ -111,19 +204,19 @@
return;
}
- XMLSize_t tupleSize = other->fValueTuples->size();
-
- for (XMLSize_t i=0; i<tupleSize; i++) {
-
- FieldValueMap* valueMap = other->fValueTuples->elementAt(i);
+ RefHashTableOfEnumerator<FieldValueMap, ICValueHasher>
iter(other->fValueTuples, false, fMemoryManager);
+ while(iter.hasMoreElements())
+ {
+ FieldValueMap& valueMap = iter.nextElement();
- if (!contains(valueMap)) {
+ if (!contains(&valueMap)) {
if (!fValueTuples) {
- fValueTuples = new (fMemoryManager)
RefVectorOf<FieldValueMap>(4, true, fMemoryManager);
+ fValueTuples = new (fMemoryManager)
RefHashTableOf<FieldValueMap, ICValueHasher>(107, true,
ICValueHasher(fMemoryManager), fMemoryManager);
}
- fValueTuples->addElement(new (fMemoryManager)
FieldValueMap(*valueMap));
+ FieldValueMap* pICItem = new (fMemoryManager)
FieldValueMap(valueMap);
+ fValueTuples->put(pICItem, pICItem);
}
}
}
@@ -164,73 +257,9 @@
bool ValueStore::contains(const FieldValueMap* const other) {
- if (fValueTuples) {
-
- XMLSize_t otherSize = other->size();
- XMLSize_t tupleSize = fValueTuples->size();
-
- for (XMLSize_t i=0; i<tupleSize; i++) {
-
- FieldValueMap* valueMap = fValueTuples->elementAt(i);
-
- if (otherSize == valueMap->size()) {
-
- bool matchFound = true;
-
- for (XMLSize_t j=0; j<otherSize; j++) {
- if (!isDuplicateOf(valueMap->getDatatypeValidatorAt(j),
valueMap->getValueAt(j),
- other->getDatatypeValidatorAt(j),
other->getValueAt(j))) {
- matchFound = false;
- break;
- }
- }
-
- if (matchFound) { // found it
- return true;
- }
- }
- }
- }
-
- return false;
-}
-
-bool ValueStore::isDuplicateOf(DatatypeValidator* const dv1, const XMLCh*
const val1,
- DatatypeValidator* const dv2, const XMLCh*
const val2) {
-
- // if either validator's null, fall back on string comparison
- if(!dv1 || !dv2) {
- return (XMLString::equals(val1, val2));
- }
-
- bool val1IsEmpty = (val1==0 || *val1==0);
- bool val2IsEmpty = (val2==0 || *val2==0);
-
- if (val1IsEmpty && val2IsEmpty) {
-
- if (dv1 == dv2) {
- return true;
- }
-
- return false;
- }
-
- if (val1IsEmpty || val2IsEmpty) {
- return false;
- }
-
- // find the common ancestor, if there is one
- DatatypeValidator* tempVal1 = dv1;
- while(tempVal1)
- {
- DatatypeValidator* tempVal2 = dv2;
- for(; tempVal2 != NULL && tempVal2 != tempVal1; tempVal2 =
tempVal2->getBaseValidator()) ;
- if (tempVal2)
- return ((tempVal2->compare(val1, val2, fMemoryManager)) == 0);
- tempVal1=tempVal1->getBaseValidator();
- }
+ if (fValueTuples)
+ return fValueTuples->get(other)!=0;
- // if we're here it means the types weren't related. They are different:
return false;
}
@@ -239,7 +268,7 @@
fValuesCount=0;
fValues.clear();
if(fValueTuples)
- fValueTuples->removeAllElements();
+ fValueTuples->removeAll();
}
// ---------------------------------------------------------------------------
@@ -263,16 +292,18 @@
return;
}
- XMLSize_t count = (fValueTuples) ? fValueTuples->size() : 0;
-
- for (XMLSize_t i = 0; i < count; i++) {
-
- FieldValueMap* valueMap = fValueTuples->elementAt(i);
+ if(fValueTuples)
+ {
+ RefHashTableOfEnumerator<FieldValueMap, ICValueHasher>
iter(fValueTuples, false, fMemoryManager);
+ while(iter.hasMoreElements())
+ {
+ FieldValueMap& valueMap = iter.nextElement();
- if (!keyValueStore->contains(valueMap) && fDoReportError) {
+ if (!keyValueStore->contains(&valueMap) && fDoReportError) {
- fScanner->getValidator()->emitError(XMLValid::IC_KeyNotFound,
- fIdentityConstraint->getElementName());
+
fScanner->getValidator()->emitError(XMLValid::IC_KeyNotFound,
+ fIdentityConstraint->getElementName());
+ }
}
}
}
Modified: xerces/c/trunk/src/xercesc/validators/schema/identity/ValueStore.hpp
URL:
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/validators/schema/identity/ValueStore.hpp?rev=798456&r1=798455&r2=798456&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/validators/schema/identity/ValueStore.hpp
(original)
+++ xerces/c/trunk/src/xercesc/validators/schema/identity/ValueStore.hpp Tue
Jul 28 09:08:05 2009
@@ -32,7 +32,7 @@
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/validators/schema/identity/FieldValueMap.hpp>
-#include <xercesc/util/RefVectorOf.hpp>
+#include <xercesc/util/RefHashTableOf.hpp>
XERCES_CPP_NAMESPACE_BEGIN
@@ -44,6 +44,30 @@
class XMLScanner;
class ValueStoreCache;
+struct ICValueHasher
+{
+ ICValueHasher(MemoryManager* const manager) : fMemoryManager(manager) {}
+
+ XMLSize_t getHashVal(const void* key, XMLSize_t mod) const;
+ bool equals(const void *const key1, const void *const key2) const;
+
+ // -----------------------------------------------------------------------
+ // Helper methods
+ // -----------------------------------------------------------------------
+ /**
+ * Returns whether a field associated <DatatypeValidator, String> value
+ * is a duplicate of another associated value.
+ * It is a duplicate only if either of these conditions are true:
+ * - The Datatypes are the same or related by derivation and the values
+ * are in the same valuespace.
+ * - The datatypes are unrelated and the values are Stringwise identical.
+ */
+ bool isDuplicateOf(DatatypeValidator* const dv1, const XMLCh* const val1,
+ DatatypeValidator* const dv2, const XMLCh* const val2)
const;
+
+
+ MemoryManager* fMemoryManager;
+};
class VALIDATORS_EXPORT ValueStore : public XMemory
{
@@ -93,28 +117,13 @@
ValueStore& operator= (const ValueStore& other);
// -----------------------------------------------------------------------
- // Helper methods
- // -----------------------------------------------------------------------
- /**
- * Returns whether a field associated <DatatypeValidator, String> value
- * is a duplicate of another associated value.
- * It is a duplicate only if either of these conditions are true:
- * - The Datatypes are the same or related by derivation and the values
- * are in the same valuespace.
- * - The datatypes are unrelated and the values are Stringwise identical.
- */
- bool isDuplicateOf(DatatypeValidator* const dv1, const XMLCh* const val1,
- DatatypeValidator* const dv2, const XMLCh* const val2);
-
-
- // -----------------------------------------------------------------------
// Data
// -----------------------------------------------------------------------
bool fDoReportError;
XMLSize_t fValuesCount;
IdentityConstraint* fIdentityConstraint;
FieldValueMap fValues;
- RefVectorOf<FieldValueMap>* fValueTuples;
+ RefHashTableOf<FieldValueMap, ICValueHasher>* fValueTuples;
XMLScanner* fScanner; // for error reporting - REVISIT
MemoryManager* fMemoryManager;
};
Modified: xerces/c/trunk/tests/src/XSTSHarness/XSTSHarnessHandlers.cpp
URL:
http://svn.apache.org/viewvc/xerces/c/trunk/tests/src/XSTSHarness/XSTSHarnessHandlers.cpp?rev=798456&r1=798455&r2=798456&view=diff
==============================================================================
--- xerces/c/trunk/tests/src/XSTSHarness/XSTSHarnessHandlers.cpp (original)
+++ xerces/c/trunk/tests/src/XSTSHarness/XSTSHarnessHandlers.cpp Tue Jul 28
09:08:05 2009
@@ -196,7 +196,7 @@
try
{
fErrorHandler.resetErrors();
- for(unsigned int i=0;i<fCurrentTest.fXSDNames.size();i++)
+ for(XMLSize_t i=0;i<fCurrentTest.fXSDNames.size();i++)
{
Grammar*
grammar=fParser->loadGrammar(fCurrentTest.fXSDNames.elementAt(i)->getURLText(),
Grammar::SchemaGrammarType, true);
success=(success && (grammar!=NULL));
@@ -226,7 +226,7 @@
// skip the rest of the group, as we had problems with the
schema itself
fCurrentTest.fSkipped=true;
fFailures++;
- for(unsigned int i=0;i<fCurrentTest.fXSDNames.size();i++)
+ for(XMLSize_t i=0;i<fCurrentTest.fXSDNames.size();i++)
printFile(*fCurrentTest.fXSDNames.elementAt(i));
}
else
@@ -239,7 +239,7 @@
fCurrentTest.fSkipped=true;
fFailures++;
XERCES_STD_QUALIFIER cout << "Test " <<
StrX(fCurrentTest.fTestName) << " succeeded but was expected to fail" <<
XERCES_STD_QUALIFIER endl;
- for(unsigned int
i=0;i<fCurrentTest.fXSDNames.size();i++)
+ for(XMLSize_t i=0;i<fCurrentTest.fXSDNames.size();i++)
printFile(*fCurrentTest.fXSDNames.elementAt(i));
}
}
@@ -252,7 +252,7 @@
fFailures++;
XERCES_STD_QUALIFIER cout << "Test " <<
StrX(fCurrentTest.fTestName) << " failed but was expected to pass" <<
XERCES_STD_QUALIFIER endl;
XERCES_STD_QUALIFIER cout << "Reported error: " <<
StrX(fErrorHandler.getErrorText()) << XERCES_STD_QUALIFIER endl;
- for(unsigned int
i=0;i<fCurrentTest.fXSDNames.size();i++)
+ for(XMLSize_t i=0;i<fCurrentTest.fXSDNames.size();i++)
printFile(*fCurrentTest.fXSDNames.elementAt(i));
}
}
@@ -295,7 +295,7 @@
if(fatalFailure)
{
fFailures++;
- for(unsigned int i=0;i<fCurrentTest.fXSDNames.size();i++)
+ for(XMLSize_t i=0;i<fCurrentTest.fXSDNames.size();i++)
printFile(*fCurrentTest.fXSDNames.elementAt(i));
printFile(fCurrentTest.fXMLName);
}
@@ -307,7 +307,7 @@
{
fFailures++;
XERCES_STD_QUALIFIER cout << "Test " <<
StrX(fCurrentTest.fTestName) << " succeeded but was expected to fail" <<
XERCES_STD_QUALIFIER endl;
- for(unsigned int
i=0;i<fCurrentTest.fXSDNames.size();i++)
+ for(XMLSize_t i=0;i<fCurrentTest.fXSDNames.size();i++)
printFile(*fCurrentTest.fXSDNames.elementAt(i));
printFile(fCurrentTest.fXMLName);
}
@@ -319,7 +319,7 @@
fFailures++;
XERCES_STD_QUALIFIER cout << "Test " <<
StrX(fCurrentTest.fTestName) << " failed but was expected to pass" <<
XERCES_STD_QUALIFIER endl;
XERCES_STD_QUALIFIER cout << "Reported error: " <<
StrX(fErrorHandler.getErrorText()) << XERCES_STD_QUALIFIER endl;
- for(unsigned int
i=0;i<fCurrentTest.fXSDNames.size();i++)
+ for(XMLSize_t i=0;i<fCurrentTest.fXSDNames.size();i++)
printFile(*fCurrentTest.fXSDNames.elementAt(i));
printFile(fCurrentTest.fXMLName);
}
Added: xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/invalid.xml
URL:
http://svn.apache.org/viewvc/xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/invalid.xml?rev=798456&view=auto
==============================================================================
--- xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/invalid.xml
(added)
+++ xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/invalid.xml
Tue Jul 28 09:08:05 2009
@@ -0,0 +1,5 @@
+<?xml version="1.0"?>
+<root xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="schema.xsd">
+ <item v3="10:44:32+02:00" v1="1" v2="x"/>
+ <item v3="11:44:32.000+03:00" v1="+1.0" v2="x"/>
+</root>
\ No newline at end of file
Added: xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/schema.xsd
URL:
http://svn.apache.org/viewvc/xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/schema.xsd?rev=798456&view=auto
==============================================================================
--- xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/schema.xsd
(added)
+++ xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/schema.xsd Tue
Jul 28 09:08:05 2009
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
+ <xsd:element name="root">
+ <xsd:complexType>
+ <xsd:sequence minOccurs="1" maxOccurs="unbounded">
+ <xsd:element name="item">
+ <xsd:complexType>
+ <xsd:attribute name="v1" type="xsd:decimal"/>
+ <xsd:attribute name="v2" type="xsd:string"/>
+ <xsd:attribute name="v3" type="xsd:time"/>
+ </xsd:complexType>
+ </xsd:element>
+ </xsd:sequence>
+ </xsd:complexType>
+ <xsd:unique name="uniqueCons">
+ <xsd:selector xpath="item"/>
+ <xsd:field xpath="@v1"/>
+ <xsd:field xpath="@v2"/>
+ <xsd:field xpath="@v3"/>
+ </xsd:unique>
+ </xsd:element>
+</xsd:schema>
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]