Author: amassari
Date: Tue Jul 28 09:08:05 2009
New Revision: 798456

URL: http://svn.apache.org/viewvc?rev=798456&view=rev
Log:
Improve scalability of identity checking by using a hash table whose hash value 
is computed on the canonical values of the values (XERCESC-1878)

Added:
    xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/
    xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/invalid.xml
    xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/schema.xsd
    xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/valid.xml
Modified:
    xerces/c/trunk/src/xercesc/validators/schema/identity/ValueStore.cpp
    xerces/c/trunk/src/xercesc/validators/schema/identity/ValueStore.hpp
    xerces/c/trunk/tests/src/XSTSHarness/XSTSHarnessHandlers.cpp
    xerces/c/trunk/tests/src/XSTSHarness/regression/Xerces.testSet

Modified: xerces/c/trunk/src/xercesc/validators/schema/identity/ValueStore.cpp
URL: 
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/validators/schema/identity/ValueStore.cpp?rev=798456&r1=798455&r2=798456&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/validators/schema/identity/ValueStore.cpp 
(original)
+++ xerces/c/trunk/src/xercesc/validators/schema/identity/ValueStore.cpp Tue 
Jul 28 09:08:05 2009
@@ -33,6 +33,98 @@
 
 XERCES_CPP_NAMESPACE_BEGIN
 
+//
+// ---------------------------------------------------------------------------
+// ICValueHasher: the hasher for identity constraints values
+// ---------------------------------------------------------------------------
+XMLSize_t ICValueHasher::getHashVal(const void* key, XMLSize_t mod) const
+{
+    const FieldValueMap* valueMap=(const FieldValueMap*)key;
+    XMLSize_t hashVal = 0;
+
+    XMLSize_t size = valueMap->size();
+    for (XMLSize_t j=0; j<size; j++) {
+        DatatypeValidator* const dv = valueMap->getDatatypeValidatorAt(j);
+        const XMLCh* const val = valueMap->getValueAt(j);
+        const XMLCh* canonVal = (dv && 
val)?dv->getCanonicalRepresentation(val, fMemoryManager):0;
+        if(canonVal)
+        {
+            hashVal += XMLString::hash(canonVal, mod);
+            fMemoryManager->deallocate((void*)canonVal);
+        }
+        else if(val)
+            hashVal += XMLString::hash(val, mod);
+    }
+
+    return hashVal % mod;
+}
+
+bool ICValueHasher::equals(const void *const key1, const void *const key2) 
const
+{
+    const FieldValueMap* left=(const FieldValueMap*)key1;
+    const FieldValueMap* right=(const FieldValueMap*)key2;
+
+    XMLSize_t lSize = left->size();
+    XMLSize_t rSize = right->size();
+    if (lSize == rSize) 
+    {
+        bool matchFound = true;
+
+        for (XMLSize_t j=0; j<rSize; j++) {
+            if (!isDuplicateOf(left->getDatatypeValidatorAt(j), 
left->getValueAt(j),
+                               right->getDatatypeValidatorAt(j), 
right->getValueAt(j))) {
+                matchFound = false;
+                break;
+            }
+        }
+
+        if (matchFound) { // found it
+            return true;
+        }
+    }
+    return false;
+}
+
+bool ICValueHasher::isDuplicateOf(DatatypeValidator* const dv1, const XMLCh* 
const val1,
+                                  DatatypeValidator* const dv2, const XMLCh* 
const val2) const 
+{
+
+    // if either validator's null, fall back on string comparison
+    if(!dv1 || !dv2) {
+        return (XMLString::equals(val1, val2));
+    }
+
+    bool val1IsEmpty = (val1==0 || *val1==0);
+    bool val2IsEmpty = (val2==0 || *val2==0);
+
+    if (val1IsEmpty && val2IsEmpty) {
+
+        if (dv1 == dv2) {
+            return true;
+        }
+
+        return false;
+    }
+
+    if (val1IsEmpty || val2IsEmpty) {
+        return false;
+    }
+
+    // find the common ancestor, if there is one
+    DatatypeValidator* tempVal1 = dv1;
+    while(tempVal1)
+    {
+        DatatypeValidator* tempVal2 = dv2;
+        for(; tempVal2 != NULL && tempVal2 != tempVal1; tempVal2 = 
tempVal2->getBaseValidator()) ;
+        if (tempVal2) 
+            return ((tempVal2->compare(val1, val2, fMemoryManager)) == 0);
+        tempVal1=tempVal1->getBaseValidator();
+    }
+
+    // if we're here it means the types weren't related. They are different:
+    return false;
+}
+
 // ---------------------------------------------------------------------------
 //  ValueStore: Constructors and Destructor
 // ---------------------------------------------------------------------------
@@ -98,10 +190,11 @@
 
         // store values
         if (!fValueTuples) {
-            fValueTuples = new (fMemoryManager) RefVectorOf<FieldValueMap>(4, 
true, fMemoryManager);
+            fValueTuples = new (fMemoryManager) RefHashTableOf<FieldValueMap, 
ICValueHasher>(107, true, ICValueHasher(fMemoryManager), fMemoryManager);
         }
 
-        fValueTuples->addElement(new (fMemoryManager) FieldValueMap(fValues));
+        FieldValueMap* pICItem = new (fMemoryManager) FieldValueMap(fValues);
+        fValueTuples->put(pICItem, pICItem);
     }
 }
 
@@ -111,19 +204,19 @@
         return;
     }
 
-    XMLSize_t tupleSize = other->fValueTuples->size();
-
-    for (XMLSize_t i=0; i<tupleSize; i++) {
-
-           FieldValueMap* valueMap = other->fValueTuples->elementAt(i);
+    RefHashTableOfEnumerator<FieldValueMap, ICValueHasher> 
iter(other->fValueTuples, false, fMemoryManager);
+    while(iter.hasMoreElements())
+    {
+        FieldValueMap& valueMap = iter.nextElement();
 
-        if (!contains(valueMap)) {
+        if (!contains(&valueMap)) {
 
             if (!fValueTuples) {
-                fValueTuples = new (fMemoryManager) 
RefVectorOf<FieldValueMap>(4, true, fMemoryManager);
+                fValueTuples = new (fMemoryManager) 
RefHashTableOf<FieldValueMap, ICValueHasher>(107, true, 
ICValueHasher(fMemoryManager), fMemoryManager);
             }
 
-            fValueTuples->addElement(new (fMemoryManager) 
FieldValueMap(*valueMap));
+            FieldValueMap* pICItem = new (fMemoryManager) 
FieldValueMap(valueMap);
+            fValueTuples->put(pICItem, pICItem);
         }
     }
 }
@@ -164,73 +257,9 @@
 
 bool ValueStore::contains(const FieldValueMap* const other) {
 
-    if (fValueTuples) {
-
-        XMLSize_t otherSize = other->size();
-        XMLSize_t tupleSize = fValueTuples->size();
-
-        for (XMLSize_t i=0; i<tupleSize; i++) {
-
-            FieldValueMap* valueMap = fValueTuples->elementAt(i);
-
-            if (otherSize == valueMap->size()) {
-
-                bool matchFound = true;
-
-                for (XMLSize_t j=0; j<otherSize; j++) {
-                    if (!isDuplicateOf(valueMap->getDatatypeValidatorAt(j), 
valueMap->getValueAt(j),
-                                       other->getDatatypeValidatorAt(j), 
other->getValueAt(j))) {
-                        matchFound = false;
-                        break;
-                    }
-                }
-
-                if (matchFound) { // found it
-                    return true;
-                }
-            }
-        }
-    }
-
-    return false;
-}
-
-bool ValueStore::isDuplicateOf(DatatypeValidator* const dv1, const XMLCh* 
const val1,
-                               DatatypeValidator* const dv2, const XMLCh* 
const val2) {
-
-    // if either validator's null, fall back on string comparison
-    if(!dv1 || !dv2) {
-        return (XMLString::equals(val1, val2));
-    }
-
-    bool val1IsEmpty = (val1==0 || *val1==0);
-    bool val2IsEmpty = (val2==0 || *val2==0);
-
-    if (val1IsEmpty && val2IsEmpty) {
-
-        if (dv1 == dv2) {
-            return true;
-        }
-
-        return false;
-    }
-
-    if (val1IsEmpty || val2IsEmpty) {
-        return false;
-    }
-
-    // find the common ancestor, if there is one
-    DatatypeValidator* tempVal1 = dv1;
-    while(tempVal1)
-    {
-        DatatypeValidator* tempVal2 = dv2;
-        for(; tempVal2 != NULL && tempVal2 != tempVal1; tempVal2 = 
tempVal2->getBaseValidator()) ;
-        if (tempVal2) 
-            return ((tempVal2->compare(val1, val2, fMemoryManager)) == 0);
-        tempVal1=tempVal1->getBaseValidator();
-    }
+    if (fValueTuples)
+        return fValueTuples->get(other)!=0;
 
-    // if we're here it means the types weren't related. They are different:
     return false;
 }
 
@@ -239,7 +268,7 @@
     fValuesCount=0;
     fValues.clear();
     if(fValueTuples)
-        fValueTuples->removeAllElements();
+        fValueTuples->removeAll();
 }
 
 // ---------------------------------------------------------------------------
@@ -263,16 +292,18 @@
             return;
         }
 
-        XMLSize_t count = (fValueTuples) ? fValueTuples->size() : 0;
-
-        for (XMLSize_t i = 0; i < count; i++) {
-
-            FieldValueMap* valueMap = fValueTuples->elementAt(i);
+        if(fValueTuples)
+        {
+            RefHashTableOfEnumerator<FieldValueMap, ICValueHasher> 
iter(fValueTuples, false, fMemoryManager);
+            while(iter.hasMoreElements())
+            {
+                FieldValueMap& valueMap = iter.nextElement();
 
-            if (!keyValueStore->contains(valueMap) && fDoReportError) {
+                if (!keyValueStore->contains(&valueMap) && fDoReportError) {
 
-                fScanner->getValidator()->emitError(XMLValid::IC_KeyNotFound,
-                    fIdentityConstraint->getElementName());
+                    
fScanner->getValidator()->emitError(XMLValid::IC_KeyNotFound,
+                        fIdentityConstraint->getElementName());
+                }
             }
         }
     }

Modified: xerces/c/trunk/src/xercesc/validators/schema/identity/ValueStore.hpp
URL: 
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/validators/schema/identity/ValueStore.hpp?rev=798456&r1=798455&r2=798456&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/validators/schema/identity/ValueStore.hpp 
(original)
+++ xerces/c/trunk/src/xercesc/validators/schema/identity/ValueStore.hpp Tue 
Jul 28 09:08:05 2009
@@ -32,7 +32,7 @@
 //  Includes
 // ---------------------------------------------------------------------------
 #include <xercesc/validators/schema/identity/FieldValueMap.hpp>
-#include <xercesc/util/RefVectorOf.hpp>
+#include <xercesc/util/RefHashTableOf.hpp>
 
 XERCES_CPP_NAMESPACE_BEGIN
 
@@ -44,6 +44,30 @@
 class XMLScanner;
 class ValueStoreCache;
 
+struct ICValueHasher
+{
+    ICValueHasher(MemoryManager* const manager) : fMemoryManager(manager) {}
+
+    XMLSize_t getHashVal(const void* key, XMLSize_t mod) const;
+    bool equals(const void *const key1, const void *const key2) const;
+
+    // -----------------------------------------------------------------------
+    //  Helper methods
+    // -----------------------------------------------------------------------
+    /**
+      * Returns whether a field associated <DatatypeValidator, String> value
+      * is a duplicate of another associated value.
+      * It is a duplicate only if either of these conditions are true:
+      * - The Datatypes are the same or related by derivation and the values
+      *   are in the same valuespace.
+      * - The datatypes are unrelated and the values are Stringwise identical.
+      */
+    bool isDuplicateOf(DatatypeValidator* const dv1, const XMLCh* const val1,
+                       DatatypeValidator* const dv2, const XMLCh* const val2) 
const;
+
+
+    MemoryManager* fMemoryManager;
+};
 
 class VALIDATORS_EXPORT ValueStore : public XMemory
 {
@@ -93,28 +117,13 @@
     ValueStore& operator= (const ValueStore& other);
 
     // -----------------------------------------------------------------------
-    //  Helper methods
-    // -----------------------------------------------------------------------
-    /**
-      * Returns whether a field associated <DatatypeValidator, String> value
-      * is a duplicate of another associated value.
-      * It is a duplicate only if either of these conditions are true:
-      * - The Datatypes are the same or related by derivation and the values
-      *   are in the same valuespace.
-      * - The datatypes are unrelated and the values are Stringwise identical.
-      */
-    bool isDuplicateOf(DatatypeValidator* const dv1, const XMLCh* const val1,
-                       DatatypeValidator* const dv2, const XMLCh* const val2);
-
-
-    // -----------------------------------------------------------------------
     //  Data
     // -----------------------------------------------------------------------
     bool                        fDoReportError;
     XMLSize_t                   fValuesCount;
     IdentityConstraint*         fIdentityConstraint;
     FieldValueMap               fValues;
-    RefVectorOf<FieldValueMap>* fValueTuples;
+    RefHashTableOf<FieldValueMap, ICValueHasher>* fValueTuples;
     XMLScanner*                 fScanner; // for error reporting - REVISIT
     MemoryManager*              fMemoryManager;
 };

Modified: xerces/c/trunk/tests/src/XSTSHarness/XSTSHarnessHandlers.cpp
URL: 
http://svn.apache.org/viewvc/xerces/c/trunk/tests/src/XSTSHarness/XSTSHarnessHandlers.cpp?rev=798456&r1=798455&r2=798456&view=diff
==============================================================================
--- xerces/c/trunk/tests/src/XSTSHarness/XSTSHarnessHandlers.cpp (original)
+++ xerces/c/trunk/tests/src/XSTSHarness/XSTSHarnessHandlers.cpp Tue Jul 28 
09:08:05 2009
@@ -196,7 +196,7 @@
             try
             {
                 fErrorHandler.resetErrors();
-                for(unsigned int i=0;i<fCurrentTest.fXSDNames.size();i++)
+                for(XMLSize_t i=0;i<fCurrentTest.fXSDNames.size();i++)
                 {
                     Grammar* 
grammar=fParser->loadGrammar(fCurrentTest.fXSDNames.elementAt(i)->getURLText(), 
Grammar::SchemaGrammarType, true);
                     success=(success && (grammar!=NULL));
@@ -226,7 +226,7 @@
                 // skip the rest of the group, as we had problems with the 
schema itself
                 fCurrentTest.fSkipped=true;
                 fFailures++;
-                for(unsigned int i=0;i<fCurrentTest.fXSDNames.size();i++)
+                for(XMLSize_t i=0;i<fCurrentTest.fXSDNames.size();i++)
                     printFile(*fCurrentTest.fXSDNames.elementAt(i));
             }
             else
@@ -239,7 +239,7 @@
                         fCurrentTest.fSkipped=true;
                         fFailures++;
                         XERCES_STD_QUALIFIER cout << "Test " << 
StrX(fCurrentTest.fTestName) << " succeeded but was expected to fail" << 
XERCES_STD_QUALIFIER endl;
-                        for(unsigned int 
i=0;i<fCurrentTest.fXSDNames.size();i++)
+                        for(XMLSize_t i=0;i<fCurrentTest.fXSDNames.size();i++)
                             printFile(*fCurrentTest.fXSDNames.elementAt(i));
                     }
                 }
@@ -252,7 +252,7 @@
                         fFailures++;
                         XERCES_STD_QUALIFIER cout << "Test " << 
StrX(fCurrentTest.fTestName) << " failed but was expected to pass" << 
XERCES_STD_QUALIFIER endl;
                         XERCES_STD_QUALIFIER cout << "Reported error: " << 
StrX(fErrorHandler.getErrorText()) << XERCES_STD_QUALIFIER endl;
-                        for(unsigned int 
i=0;i<fCurrentTest.fXSDNames.size();i++)
+                        for(XMLSize_t i=0;i<fCurrentTest.fXSDNames.size();i++)
                             printFile(*fCurrentTest.fXSDNames.elementAt(i));
                     }
                 }
@@ -295,7 +295,7 @@
             if(fatalFailure)
             {
                 fFailures++;
-                for(unsigned int i=0;i<fCurrentTest.fXSDNames.size();i++)
+                for(XMLSize_t i=0;i<fCurrentTest.fXSDNames.size();i++)
                     printFile(*fCurrentTest.fXSDNames.elementAt(i));
                 printFile(fCurrentTest.fXMLName);
             }
@@ -307,7 +307,7 @@
                     {
                         fFailures++;
                         XERCES_STD_QUALIFIER cout << "Test " << 
StrX(fCurrentTest.fTestName) << " succeeded but was expected to fail" << 
XERCES_STD_QUALIFIER endl;
-                        for(unsigned int 
i=0;i<fCurrentTest.fXSDNames.size();i++)
+                        for(XMLSize_t i=0;i<fCurrentTest.fXSDNames.size();i++)
                             printFile(*fCurrentTest.fXSDNames.elementAt(i));
                         printFile(fCurrentTest.fXMLName);
                     }
@@ -319,7 +319,7 @@
                         fFailures++;
                         XERCES_STD_QUALIFIER cout << "Test " << 
StrX(fCurrentTest.fTestName) << " failed but was expected to pass" << 
XERCES_STD_QUALIFIER endl;
                         XERCES_STD_QUALIFIER cout << "Reported error: " << 
StrX(fErrorHandler.getErrorText()) << XERCES_STD_QUALIFIER endl;
-                        for(unsigned int 
i=0;i<fCurrentTest.fXSDNames.size();i++)
+                        for(XMLSize_t i=0;i<fCurrentTest.fXSDNames.size();i++)
                             printFile(*fCurrentTest.fXSDNames.elementAt(i));
                         printFile(fCurrentTest.fXMLName);
                     }

Added: xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/invalid.xml
URL: 
http://svn.apache.org/viewvc/xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/invalid.xml?rev=798456&view=auto
==============================================================================
--- xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/invalid.xml 
(added)
+++ xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/invalid.xml 
Tue Jul 28 09:08:05 2009
@@ -0,0 +1,5 @@
+<?xml version="1.0"?>
+<root xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:noNamespaceSchemaLocation="schema.xsd">
+       <item v3="10:44:32+02:00" v1="1" v2="x"/>
+       <item v3="11:44:32.000+03:00" v1="+1.0" v2="x"/>
+</root>
\ No newline at end of file

Added: xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/schema.xsd
URL: 
http://svn.apache.org/viewvc/xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/schema.xsd?rev=798456&view=auto
==============================================================================
--- xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/schema.xsd 
(added)
+++ xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-1878/schema.xsd Tue 
Jul 28 09:08:05 2009
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema";>
+    <xsd:element name="root">
+        <xsd:complexType>
+            <xsd:sequence minOccurs="1" maxOccurs="unbounded">
+                <xsd:element name="item">
+                    <xsd:complexType>
+                        <xsd:attribute name="v1" type="xsd:decimal"/>
+                        <xsd:attribute name="v2" type="xsd:string"/>
+                        <xsd:attribute name="v3" type="xsd:time"/>
+                    </xsd:complexType>
+                </xsd:element>
+            </xsd:sequence>
+        </xsd:complexType>
+        <xsd:unique name="uniqueCons">
+            <xsd:selector xpath="item"/>
+            <xsd:field xpath="@v1"/>
+            <xsd:field xpath="@v2"/>
+            <xsd:field xpath="@v3"/>
+        </xsd:unique>
+    </xsd:element>
+</xsd:schema>
\ No newline at end of file



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to