Author: amassari
Date: Mon Aug 24 13:47:35 2009
New Revision: 807213
URL: http://svn.apache.org/viewvc?rev=807213&view=rev
Log:
Improved performance and reduced memory footprint of schema validation
involving large maxOccurs:
- the CMStateSet uses a sparsely allocated matrix to store the bits, resulting
in less memory usage and faster bitwise operations (when analyzing an
unallocated chunk, no operations are done); also, having moved the dynamic
buffer data members into a separate structure, the space used by two pointers
has been added to the cached bit fields, that is now 128 bits
- the DFA builder chooses the faster algorithm depending on the data being
analyzed.
The regression test for XERCESC-1051 now completes in 30 seconds instead of 80
Modified:
xerces/c/trunk/src/xercesc/util/XMLString.hpp
xerces/c/trunk/src/xercesc/validators/common/CMStateSet.hpp
xerces/c/trunk/src/xercesc/validators/common/DFAContentModel.cpp
xerces/c/trunk/tests/src/DOM/DOMTest/DTest.cpp
Modified: xerces/c/trunk/src/xercesc/util/XMLString.hpp
URL:
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/XMLString.hpp?rev=807213&r1=807212&r2=807213&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/XMLString.hpp (original)
+++ xerces/c/trunk/src/xercesc/util/XMLString.hpp Mon Aug 24 13:47:35 2009
@@ -1532,19 +1532,14 @@
inline XMLSize_t XMLString::stringLen(const XMLCh* const src)
{
- if (src == 0 || *src == 0)
- {
+ if (src == 0)
return 0;
- }
- else
- {
- const XMLCh* pszTmp = src + 1;
- while (*pszTmp)
- ++pszTmp;
+ const XMLCh* pszTmp = src;
- return (pszTmp - src);
- }
+ while (*pszTmp++) ;
+
+ return (pszTmp - src - 1);
}
inline XMLCh* XMLString::replicate(const XMLCh* const toRep,
Modified: xerces/c/trunk/src/xercesc/validators/common/CMStateSet.hpp
URL:
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/validators/common/CMStateSet.hpp?rev=807213&r1=807212&r2=807213&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/validators/common/CMStateSet.hpp (original)
+++ xerces/c/trunk/src/xercesc/validators/common/CMStateSet.hpp Mon Aug 24
13:47:35 2009
@@ -26,7 +26,7 @@
//
// This class is a specialized bitset class for the content model code of
// the validator. It assumes that its never called with two objects of
-// different bit counts, and that bit sets smaller than 64 bits are far
+// different bit counts, and that bit sets smaller than a threshold are far
// and away the most common. So it can be a lot more optimized than a general
// purpose utility bitset class
//
@@ -41,84 +41,112 @@
class CMStateSetEnumerator;
+#define CMSTATE_CACHED_INT32_SIZE 4
+
+#define CMSTATE_BITFIELD_CHUNK 1024
+#define CMSTATE_BITFIELD_INT32_SIZE (1024 / 32)
+
+struct CMDynamicBuffer
+{
+ // fArraySize
+ // This indicates the number of elements of the fBitArray vector
+ //
+ // fBitArray
+ // A vector of arrays of XMLInt32; each array is allocated on demand
+ // if a bit needs to be set in that range
+ //
+ // fMemoryManager
+ // The memory manager used to allocate and deallocate memory
+ //
+ XMLSize_t fArraySize;
+ XMLInt32** fBitArray;
+ MemoryManager* fMemoryManager;
+};
+
class CMStateSet : public XMemory
{
public :
// -----------------------------------------------------------------------
// Constructors and Destructor
// -----------------------------------------------------------------------
- CMStateSet( const unsigned int bitCount
+ CMStateSet( const XMLSize_t bitCount
, MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager) :
fBitCount(bitCount)
- , fBitArray(0)
- , fMemoryManager(manager)
+ , fDynamicBuffer(0)
{
//
// See if we need to allocate the byte array or whether we can live
- // within the 64 bit high performance scheme.
+ // within the cached bit high performance scheme.
//
- if (fBitCount > 64)
+ if (fBitCount > (CMSTATE_CACHED_INT32_SIZE * 32))
{
- fArraySize = fBitCount / 32;
- if (fBitCount % 32)
- fArraySize++;
- fBitArray = (XMLInt32*)
fMemoryManager->allocate(fArraySize*sizeof(XMLInt32));
+ fDynamicBuffer =
(CMDynamicBuffer*)manager->allocate(sizeof(CMDynamicBuffer));
+ fDynamicBuffer->fMemoryManager = manager;
+ // allocate an array of vectors, each one containing
CMSTATE_BITFIELD_CHUNK bits
+ fDynamicBuffer->fArraySize = fBitCount / CMSTATE_BITFIELD_CHUNK;
+ if (fBitCount % CMSTATE_BITFIELD_CHUNK)
+ fDynamicBuffer->fArraySize++;
+ fDynamicBuffer->fBitArray = (XMLInt32**)
fDynamicBuffer->fMemoryManager->allocate(fDynamicBuffer->fArraySize*sizeof(XMLInt32*));
+ for(XMLSize_t index = 0; index < fDynamicBuffer->fArraySize;
index++)
+ fDynamicBuffer->fBitArray[index]=NULL;
}
else
{
- fArraySize = 2;
- fBitArray = fBits;
+ for (XMLSize_t index = 0; index < CMSTATE_CACHED_INT32_SIZE;
index++)
+ fBits[index] = 0;
}
-
- // Init all the bits to zero
- zeroBits();
}
-
- /*
- * This method with the 'for' statement (commented out) cannot be made
inline
- * because the antiquated CC (CFront) compiler under HPUX 10.20 does not
allow
- * the 'for' statement inside any inline method. Unfortunately,
- * we have to support it. So instead, we use memcpy().
- */
-
CMStateSet(const CMStateSet& toCopy) :
XMemory(toCopy)
, fBitCount(toCopy.fBitCount)
- , fBitArray(0)
- , fMemoryManager(toCopy.fMemoryManager)
+ , fDynamicBuffer(0)
{
//
// See if we need to allocate the byte array or whether we can live
- // within the 64 bit high performance scheme.
+ // within the cahced bit high performance scheme.
//
- if (fBitCount > 64)
+ if (fBitCount > (CMSTATE_CACHED_INT32_SIZE * 32))
{
- fArraySize = fBitCount / 32;
- if (fBitCount % 32)
- fArraySize++;
- fBitArray = (XMLInt32*)
fMemoryManager->allocate(fArraySize*sizeof(XMLInt32));
+ fDynamicBuffer = (CMDynamicBuffer*)
toCopy.fDynamicBuffer->fMemoryManager->allocate(sizeof(CMDynamicBuffer));
+ fDynamicBuffer->fMemoryManager =
toCopy.fDynamicBuffer->fMemoryManager;
+ fDynamicBuffer->fArraySize = fBitCount / CMSTATE_BITFIELD_CHUNK;
+ if (fBitCount % CMSTATE_BITFIELD_CHUNK)
+ fDynamicBuffer->fArraySize++;
+ fDynamicBuffer->fBitArray = (XMLInt32**)
fDynamicBuffer->fMemoryManager->allocate(fDynamicBuffer->fArraySize*sizeof(XMLInt32*));
+ for(XMLSize_t index = 0; index < fDynamicBuffer->fArraySize;
index++)
+ {
+ if(toCopy.fDynamicBuffer->fBitArray[index]!=NULL)
+ {
+
fDynamicBuffer->fBitArray[index]=(XMLInt32*)fDynamicBuffer->fMemoryManager->allocate(CMSTATE_BITFIELD_INT32_SIZE
* sizeof(XMLInt32));
+ memcpy((void *) fDynamicBuffer->fBitArray[index],
+ (const void *)
toCopy.fDynamicBuffer->fBitArray[index],
+ CMSTATE_BITFIELD_INT32_SIZE * sizeof(XMLInt32));
+ }
+ else
+ fDynamicBuffer->fBitArray[index]=NULL;
+ }
}
else
{
- fArraySize = 2;
- fBitArray = fBits;
+ memcpy((void *) fBits,
+ (const void *) toCopy.fBits,
+ CMSTATE_CACHED_INT32_SIZE * sizeof(XMLInt32));
}
-
-
- memcpy((void *) fBitArray,
- (const void *) toCopy.fBitArray,
- fArraySize * sizeof(XMLInt32));
-
- // for (unsigned int index = 0; index < fArraySize; index++)
- // fBitArray[index] = toCopy.fBitArray[index];
}
~CMStateSet()
{
- if (fBitArray!=fBits)
- fMemoryManager->deallocate(fBitArray);
+ if(fDynamicBuffer)
+ {
+ for(XMLSize_t index = 0; index < fDynamicBuffer->fArraySize;
index++)
+ {
+ if(fDynamicBuffer->fBitArray[index]!=NULL)
+
fDynamicBuffer->fMemoryManager->deallocate(fDynamicBuffer->fBitArray[index]);
+ }
+ fDynamicBuffer->fMemoryManager->deallocate(fDynamicBuffer);
+ }
}
@@ -127,8 +155,40 @@
// -----------------------------------------------------------------------
void operator|=(const CMStateSet& setToOr)
{
- for (unsigned int index = 0; index < fArraySize; index++)
- fBitArray[index] |= setToOr.fBitArray[index];
+ if(fDynamicBuffer==0)
+ {
+ for (XMLSize_t index = 0; index < CMSTATE_CACHED_INT32_SIZE;
index++)
+ if(setToOr.fBits[index])
+ if(fBits[index])
+ fBits[index] |= setToOr.fBits[index];
+ else
+ fBits[index] = setToOr.fBits[index];
+ }
+ else
+ {
+ for (XMLSize_t index = 0; index < fDynamicBuffer->fArraySize;
index++)
+ if(setToOr.fDynamicBuffer->fBitArray[index]!=NULL)
+ {
+ // if we haven't allocated the subvector yet, allocate it
and copy
+ if(fDynamicBuffer->fBitArray[index]==NULL)
+ {
+
fDynamicBuffer->fBitArray[index]=(XMLInt32*)fDynamicBuffer->fMemoryManager->allocate(CMSTATE_BITFIELD_INT32_SIZE
* sizeof(XMLInt32));
+ memcpy((void *) fDynamicBuffer->fBitArray[index],
+ (const void *)
setToOr.fDynamicBuffer->fBitArray[index],
+ CMSTATE_BITFIELD_INT32_SIZE * sizeof(XMLInt32));
+ }
+ else
+ {
+ // otherwise, merge them
+ for(XMLSize_t subIndex = 0; subIndex <
CMSTATE_BITFIELD_INT32_SIZE; subIndex++)
+
if(setToOr.fDynamicBuffer->fBitArray[index][subIndex])
+ if(fDynamicBuffer->fBitArray[index][subIndex])
+ fDynamicBuffer->fBitArray[index][subIndex]
|= setToOr.fDynamicBuffer->fBitArray[index][subIndex];
+ else
+ fDynamicBuffer->fBitArray[index][subIndex]
= setToOr.fDynamicBuffer->fBitArray[index][subIndex];
+ }
+ }
+ }
}
bool operator==(const CMStateSet& setToCompare) const
@@ -136,10 +196,29 @@
if (fBitCount != setToCompare.fBitCount)
return false;
- for (unsigned int index = 0; index < fArraySize; index++)
+ if(fDynamicBuffer==0)
{
- if (fBitArray[index] != setToCompare.fBitArray[index])
- return false;
+ for (XMLSize_t index = 0; index < CMSTATE_CACHED_INT32_SIZE;
index++)
+ {
+ if (fBits[index] != setToCompare.fBits[index])
+ return false;
+ }
+ }
+ else
+ {
+ for (XMLSize_t index = 0; index < fDynamicBuffer->fArraySize;
index++)
+ {
+ if(fDynamicBuffer->fBitArray[index]==NULL &&
setToCompare.fDynamicBuffer->fBitArray[index]==NULL)
+ continue;
+ else if(fDynamicBuffer->fBitArray[index]==NULL ||
setToCompare.fDynamicBuffer->fBitArray[index]==NULL) // the other should have
been empty too
+ return false;
+ else
+ {
+ for(XMLSize_t subIndex = 0; subIndex <
CMSTATE_BITFIELD_INT32_SIZE; subIndex++)
+
if(fDynamicBuffer->fBitArray[index][subIndex]!=setToCompare.fDynamicBuffer->fBitArray[index][subIndex])
+ return false;
+ }
+ }
}
return true;
}
@@ -151,60 +230,207 @@
// They have to be the same size
if (fBitCount != srcSet.fBitCount)
- ThrowXMLwithMemMgr(RuntimeException,
XMLExcepts::Bitset_NotEqualSize, fMemoryManager);
-
- for (unsigned int index = 0; index < fArraySize; index++)
- fBitArray[index] = srcSet.fBitArray[index];
+ if(fDynamicBuffer)
+ ThrowXMLwithMemMgr(RuntimeException,
XMLExcepts::Bitset_NotEqualSize, fDynamicBuffer->fMemoryManager);
+ else
+ ThrowXML(RuntimeException, XMLExcepts::Bitset_NotEqualSize);
+ if(fDynamicBuffer==0)
+ {
+ for (XMLSize_t index = 0; index < CMSTATE_CACHED_INT32_SIZE;
index++)
+ fBits[index] = srcSet.fBits[index];
+ }
+ else
+ {
+ for (XMLSize_t index = 0; index < fDynamicBuffer->fArraySize;
index++)
+ if(srcSet.fDynamicBuffer->fBitArray[index]==NULL)
+ {
+ // delete this subentry
+ if(fDynamicBuffer->fBitArray[index]!=NULL)
+ {
+
fDynamicBuffer->fMemoryManager->deallocate(fDynamicBuffer->fBitArray[index]);
+ fDynamicBuffer->fBitArray[index]=NULL;
+ }
+ }
+ else
+ {
+ // if we haven't allocated the subvector yet, allocate it
and copy
+ if(fDynamicBuffer->fBitArray[index]==NULL)
+
fDynamicBuffer->fBitArray[index]=(XMLInt32*)fDynamicBuffer->fMemoryManager->allocate(CMSTATE_BITFIELD_INT32_SIZE
* sizeof(XMLInt32));
+ memcpy((void *) fDynamicBuffer->fBitArray[index],
+ (const void *)
srcSet.fDynamicBuffer->fBitArray[index],
+ CMSTATE_BITFIELD_INT32_SIZE * sizeof(XMLInt32));
+ }
+ }
return *this;
}
+ XMLSize_t getBitCountInRange(XMLSize_t start, XMLSize_t end) const
+ {
+ XMLSize_t count = 0;
+ end /= 32;
+ if(fDynamicBuffer==0)
+ {
+ if(end > CMSTATE_CACHED_INT32_SIZE)
+ end = CMSTATE_CACHED_INT32_SIZE;
+ for (XMLSize_t index = start / 32; index < end; index++)
+ {
+ if (fBits[index] != 0)
+ for(int i=0;i<32;i++)
+ {
+ XMLInt32 mask=(1UL << i);
+ if(fBits[index] & mask)
+ count++;
+ }
+ }
+ }
+ else
+ {
+ if(end > fDynamicBuffer->fArraySize)
+ end = fDynamicBuffer->fArraySize;
+ for (XMLSize_t index = start / 32; index < end; index++)
+ {
+ if(fDynamicBuffer->fBitArray[index]==NULL)
+ continue;
+ for(XMLSize_t subIndex=0;subIndex <
CMSTATE_BITFIELD_INT32_SIZE; subIndex++)
+ {
+ if (fDynamicBuffer->fBitArray[index][subIndex] != 0)
+ for(int i=0;i<32;i++)
+ {
+ XMLInt32 mask=(1UL << i);
+ if(fDynamicBuffer->fBitArray[index][subIndex] &
mask)
+ count++;
+ }
+ }
+ }
+ }
+ return count;
+ }
- bool getBit(const unsigned int bitToGet) const
+ bool getBit(const XMLSize_t bitToGet) const
{
if (bitToGet >= fBitCount)
- ThrowXMLwithMemMgr(ArrayIndexOutOfBoundsException,
XMLExcepts::Bitset_BadIndex, fMemoryManager);
+ if(fDynamicBuffer)
+ ThrowXMLwithMemMgr(ArrayIndexOutOfBoundsException,
XMLExcepts::Bitset_BadIndex, fDynamicBuffer->fMemoryManager);
+ else
+ ThrowXML(ArrayIndexOutOfBoundsException,
XMLExcepts::Bitset_BadIndex);
- const XMLInt32 mask = (0x1UL << (bitToGet % 32));
- const unsigned int byteOfs = bitToGet / 32;
// And access the right bit and byte
- return (fBitArray[byteOfs]!=0 && (fBitArray[byteOfs] & mask) != 0);
+ if(fDynamicBuffer==0)
+ {
+ const XMLInt32 mask = (0x1UL << (bitToGet % 32));
+ const XMLSize_t byteOfs = bitToGet / 32;
+ return (fBits[byteOfs]!=0 && (fBits[byteOfs] & mask) != 0);
+ }
+ else
+ {
+ const XMLSize_t vectorOfs = bitToGet / CMSTATE_BITFIELD_CHUNK;
+ if(fDynamicBuffer->fBitArray[vectorOfs]==NULL)
+ return false;
+ const XMLInt32 mask = (0x1UL << (bitToGet % 32));
+ const XMLSize_t byteOfs = (bitToGet % CMSTATE_BITFIELD_CHUNK) / 32;
+ return (fDynamicBuffer->fBitArray[vectorOfs][byteOfs]!=0 &&
(fDynamicBuffer->fBitArray[vectorOfs][byteOfs] & mask) != 0);
+ }
}
bool isEmpty() const
{
- for (unsigned int index = 0; index < fArraySize; index++)
+ if(fDynamicBuffer==0)
{
- if (fBitArray[index] != 0)
- return false;
+ for (XMLSize_t index = 0; index < CMSTATE_CACHED_INT32_SIZE;
index++)
+ {
+ if (fBits[index] != 0)
+ return false;
+ }
+ }
+ else
+ {
+ for (XMLSize_t index = 0; index < fDynamicBuffer->fArraySize;
index++)
+ {
+ if(fDynamicBuffer->fBitArray[index]==NULL)
+ continue;
+ for(XMLSize_t subIndex=0;subIndex <
CMSTATE_BITFIELD_INT32_SIZE; subIndex++)
+ {
+ if (fDynamicBuffer->fBitArray[index][subIndex] != 0)
+ return false;
+ }
+ }
}
return true;
}
- void setBit(const unsigned int bitToSet)
+ void setBit(const XMLSize_t bitToSet)
{
if (bitToSet >= fBitCount)
- ThrowXMLwithMemMgr(ArrayIndexOutOfBoundsException,
XMLExcepts::Bitset_BadIndex, fMemoryManager);
+ if(fDynamicBuffer)
+ ThrowXMLwithMemMgr(ArrayIndexOutOfBoundsException,
XMLExcepts::Bitset_BadIndex, fDynamicBuffer->fMemoryManager);
+ else
+ ThrowXML(ArrayIndexOutOfBoundsException,
XMLExcepts::Bitset_BadIndex);
const XMLInt32 mask = (0x1UL << (bitToSet % 32));
- const unsigned int byteOfs = bitToSet / 32;
// And access the right bit and byte
- fBitArray[byteOfs] &= ~mask;
- fBitArray[byteOfs] |= mask;
+ if(fDynamicBuffer==0)
+ {
+ const XMLSize_t byteOfs = bitToSet / 32;
+ fBits[byteOfs] &= ~mask;
+ fBits[byteOfs] |= mask;
+ }
+ else
+ {
+ const XMLSize_t vectorOfs = bitToSet / CMSTATE_BITFIELD_CHUNK;
+ if(fDynamicBuffer->fBitArray[vectorOfs]==NULL)
+ {
+
fDynamicBuffer->fBitArray[vectorOfs]=(XMLInt32*)fDynamicBuffer->fMemoryManager->allocate(CMSTATE_BITFIELD_INT32_SIZE
* sizeof(XMLInt32));
+ for(XMLSize_t index=0;index < CMSTATE_BITFIELD_INT32_SIZE;
index++)
+ fDynamicBuffer->fBitArray[vectorOfs][index]=0;
+ }
+ const XMLSize_t byteOfs = (bitToSet % CMSTATE_BITFIELD_CHUNK) / 32;
+ fDynamicBuffer->fBitArray[vectorOfs][byteOfs] &= ~mask;
+ fDynamicBuffer->fBitArray[vectorOfs][byteOfs] |= mask;
+ }
}
void zeroBits()
{
- for (unsigned int index = 0; index < fArraySize; index++)
- fBitArray[index] = 0;
+ if(fDynamicBuffer==0)
+ {
+ for (XMLSize_t index = 0; index < CMSTATE_CACHED_INT32_SIZE;
index++)
+ fBits[index] = 0;
+ }
+ else
+ {
+ for (XMLSize_t index = 0; index < fDynamicBuffer->fArraySize;
index++)
+ // delete this subentry
+ if(fDynamicBuffer->fBitArray[index]!=NULL)
+ {
+
fDynamicBuffer->fMemoryManager->deallocate(fDynamicBuffer->fBitArray[index]);
+ fDynamicBuffer->fBitArray[index]=NULL;
+ }
+ }
}
XMLSize_t hashCode() const
{
XMLSize_t hash = 0;
- for (XMLSize_t index = 0; index<fArraySize; index++)
- hash = fBitArray[index] + hash * 31;
+ if(fDynamicBuffer==0)
+ {
+ for (XMLSize_t index = 0; index<CMSTATE_CACHED_INT32_SIZE; index++)
+ hash = fBits[index] + hash * 31;
+ }
+ else
+ {
+ for (XMLSize_t index = 0; index<fDynamicBuffer->fArraySize;
index++)
+ {
+ if(fDynamicBuffer->fBitArray[index]==NULL)
+ // simulates the iteration on the missing bits
+ for(XMLSize_t subIndex=0;subIndex <
CMSTATE_BITFIELD_INT32_SIZE; subIndex++)
+ hash *= 31;
+ else
+ for(XMLSize_t subIndex=0;subIndex <
CMSTATE_BITFIELD_INT32_SIZE; subIndex++)
+ hash = fDynamicBuffer->fBitArray[index][subIndex] +
hash * 31;
+ }
+ }
return hash;
}
@@ -222,26 +448,21 @@
// The count of bits that the outside world wants to support,
// so its the max bit index plus one.
//
- // fArraySize
- // If the bit count is > 64, then we use the fBitArray member to
- // store the bits, and this indicates its size in bytes. Otherwise
- // its value is meaningless and unset.
- //
// fBits
- // When the bit count is <= 64 (very common), these hold the bits.
- // Otherwise, the fBitArray member holds htem.
+ // When the bit count is less than a threshold (very common), these
hold the bits.
+ // Otherwise, the fDynamicBuffer member holds htem.
//
- // fBitArray
- // The array of bytes used when the bit count is > 64. It is
- // allocated as required.
+ // fDynamicBuffer
+ // If the bit count is greater than the threshold, then we allocate
this structure to
+ // store the bits, the length, and the memory manager to
allocate/deallocate
+ // the memory
+ //
// -----------------------------------------------------------------------
- unsigned int fBitCount;
- unsigned int fArraySize;
- XMLInt32 fBits[2];
- XMLInt32* fBitArray;
- MemoryManager* fMemoryManager;
+ XMLSize_t fBitCount;
+ XMLInt32 fBits[CMSTATE_CACHED_INT32_SIZE];
+ CMDynamicBuffer* fDynamicBuffer;
- friend class CMStateSetEnumerator ;
+ friend class CMStateSetEnumerator;
};
class CMStateSetEnumerator : public XMemory
@@ -249,9 +470,8 @@
public:
CMStateSetEnumerator(const CMStateSet* toEnum) :
fToEnum(toEnum),
- fIndexCount((unsigned int)-1),
- fLastValue(0),
- fByteArrayCursor(0)
+ fIndexCount((XMLSize_t)-1),
+ fLastValue(0)
{
findNext();
}
@@ -269,7 +489,7 @@
if(fLastValue & mask)
{
fLastValue &= ~mask;
- unsigned int retVal=fIndexCount+i;
+ unsigned int retVal=(unsigned int)fIndexCount+i;
if(fLastValue==0)
findNext();
return retVal;
@@ -281,22 +501,43 @@
private:
void findNext()
{
- unsigned int nOffset=((fIndexCount==(unsigned
int)-1)?0:(fIndexCount/32)+1), i;
- for(i=nOffset;i<fToEnum->fArraySize;i++)
+ if(fToEnum->fDynamicBuffer==0)
+ {
+ XMLSize_t
nOffset=((fIndexCount==(XMLSize_t)-1)?0:(fIndexCount/32)+1);
+ for(XMLSize_t
index=nOffset;index<CMSTATE_CACHED_INT32_SIZE;index++)
+ {
+ if(fToEnum->fBits[index]!=0)
+ {
+ fIndexCount=index*32;
+ fLastValue=fToEnum->fBits[index];
+ return;
+ }
+ }
+ }
+ else
{
- if(fToEnum->fBitArray[i]!=0)
+ XMLSize_t
nOffset=((fIndexCount==(XMLSize_t)-1)?0:(fIndexCount/CMSTATE_BITFIELD_CHUNK));
+ XMLSize_t nSubOffset=((fIndexCount==(XMLSize_t)-1)?0:((fIndexCount
% CMSTATE_BITFIELD_CHUNK) /32)+1);
+ for (XMLSize_t index = nOffset;
index<fToEnum->fDynamicBuffer->fArraySize; index++)
{
- fIndexCount=i*32;
- fLastValue=fToEnum->fBitArray[i];
- break;
+ if(fToEnum->fDynamicBuffer->fBitArray[index]!=NULL)
+ {
+ for(XMLSize_t subIndex=nSubOffset;subIndex <
CMSTATE_BITFIELD_INT32_SIZE; subIndex++)
+
if(fToEnum->fDynamicBuffer->fBitArray[index][subIndex]!=0)
+ {
+ fIndexCount=index*CMSTATE_BITFIELD_CHUNK +
subIndex*32;
+
fLastValue=fToEnum->fDynamicBuffer->fBitArray[index][subIndex];
+ return;
+ }
+ }
+ nSubOffset = 0; // next chunks will be processed from the
beginning
}
}
}
- const CMStateSet* fToEnum;
- unsigned int fIndexCount;
- XMLInt32 fLastValue;
- unsigned int fByteArrayCursor;
+ const CMStateSet* fToEnum;
+ XMLSize_t fIndexCount;
+ XMLInt32 fLastValue;
};
XERCES_CPP_NAMESPACE_END
Modified: xerces/c/trunk/src/xercesc/validators/common/DFAContentModel.cpp
URL:
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/validators/common/DFAContentModel.cpp?rev=807213&r1=807212&r2=807213&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/validators/common/DFAContentModel.cpp (original)
+++ xerces/c/trunk/src/xercesc/validators/common/DFAContentModel.cpp Mon Aug 24
13:47:35 2009
@@ -40,6 +40,7 @@
#include <xercesc/validators/schema/XercesElementWildcard.hpp>
#include <xercesc/util/RefHashTableOf.hpp>
#include <xercesc/util/XMLInteger.hpp>
+#include <math.h>
XERCES_CPP_NAMESPACE_BEGIN
@@ -959,9 +960,11 @@
// Bump up the unmarked state count, marking this state done
unmarkedState++;
+#ifdef OPTIMIZED_BUT_STILL_LINEAR_SEARCH
// Optimization(Jan, 2001)
unsigned int sorterIndex = 0;
// Optimization(Jan, 2001)
+#endif
// Loop through each possible input symbol in the element map
for (unsigned int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++)
@@ -1034,30 +1037,68 @@
unsigned int fNumItems=fLeafIndexes[0];
if(fNumItems!=0)
{
- CMStateSetEnumerator enumBits(setT);
- while(enumBits.hasMoreElements())
+ // The algorithm requires finding the leaf that is present
both in the bitfield of the current state, and in the
+ // list of places where the currently tested item can appear.
When this occurs, the follow list of this parent item
+ // is added to the bitfield representing the next state.
+ // Both the bitfield and the list of places are sorted, so we
can analyze them in two ways; either iterating over the
+ // parent items, testing the bitfield for the existence of the
parent (N times a constant Tb), or by iterating over the
+ // bitfield (restricted to the range of the sorted list of
places), using a binary search to locate the leaf in the
+ // sorted list of places (M times log(N) testing operations Ts)
+ // Assuming that the time to test a bit is roughly the same of
the time needed to compute the average of two integers,
+ // plus a couple of comparisons and additions, we compare N
agains M*log(N) to decide which algorithm should be faster given
+ // the two sets
+ if(fNumItems <= setT->getBitCountInRange(fLeafIndexes[1],
fLeafIndexes[fNumItems])*log((float)fNumItems))
{
- unsigned int bitIndex=enumBits.nextElement();
- // Check if this leaf index (DFA position) is in the
current set
- // (using binary search: the indexes are sorted)
- unsigned int first=1,last=fNumItems,i;
- while(first<=last)
- {
- i=(first+last)/2;
- if(fLeafIndexes[i]>bitIndex)
- last=i-1;
- else if(fLeafIndexes[i]<bitIndex)
- first=i+1;
- else
+ for(unsigned int i=1; i<=fNumItems; ++i)
+ if(setT->getBit(fLeafIndexes[i]))
{
- // found!
//
// If this leaf is the current input symbol, then
we
// want to add its follow list to the set of
states to
// transition to from the current state.
//
- *newSet |= *fFollowList[bitIndex];
+ *newSet |= *fFollowList[ fLeafIndexes[i] ];
+ }
+ }
+ else
+ {
+ // Further optimization: given that the bitfield
enumerator returns the numbers in order,
+ // every time we raise the lower marker we know it will
true also for the next bits, so
+ // the next binary search will not start from 1 but from
this index
+ unsigned int lowIndex = 1;
+ CMStateSetEnumerator enumBits(setT);
+ while(enumBits.hasMoreElements())
+ {
+ unsigned int bitIndex=enumBits.nextElement();
+ // if this leaf has an index less than the first index
in the sorted list of places,
+ // it cannot be here, so avoid starting the binary
search
+ if(bitIndex < fLeafIndexes[1])
+ continue;
+ // if this leaf is greater than the last index in the
sorted list of places,
+ // nothing can be found from now on, so get out of here
+ else if(bitIndex > fLeafIndexes[fNumItems])
break;
+
+ // Check if this leaf index (DFA position) is in the
current set
+ // (using binary search: the indexes are sorted)
+ unsigned int first=lowIndex,last=fNumItems,i;
+ while(first<=last)
+ {
+ i=(first+last)/2;
+ if(fLeafIndexes[i]>bitIndex)
+ last=i-1;
+ else if(fLeafIndexes[i]<bitIndex)
+ lowIndex=first=i+1;
+ else
+ {
+ //
+ // If this leaf is the current input symbol,
then we
+ // want to add its follow list to the set of
states to
+ // transition to from the current state.
+ //
+ *newSet |= *fFollowList[bitIndex];
+ break;
+ }
}
}
}
Modified: xerces/c/trunk/tests/src/DOM/DOMTest/DTest.cpp
URL:
http://svn.apache.org/viewvc/xerces/c/trunk/tests/src/DOM/DOMTest/DTest.cpp?rev=807213&r1=807212&r2=807213&view=diff
==============================================================================
--- xerces/c/trunk/tests/src/DOM/DOMTest/DTest.cpp (original)
+++ xerces/c/trunk/tests/src/DOM/DOMTest/DTest.cpp Mon Aug 24 13:47:35 2009
@@ -39,6 +39,7 @@
#include <xercesc/dom/DOMLSParserFilter.hpp>
#include <xercesc/util/OutOfMemoryException.hpp>
#include <xercesc/framework/MemBufInputSource.hpp>
+#include <xercesc/validators/common/CMStateSet.hpp>
#define UNUSED(x) { if(x!=0){} }
@@ -1751,7 +1752,7 @@
// EXCEPTIONSTEST(charData->deleteData(2, -1),
DOMException::INDEX_SIZE_ERR, OK, 102 );
EXCEPTIONSTEST(charData->deleteData(100, 5), DOMException::INDEX_SIZE_ERR,
OK,103 );
-//can't set negative unsigned int in c++ compiler
+//can't set negative XMLSize_t in c++ compiler
// EXCEPTIONSTEST(charData->insertData(-1, "Stuff inserted"),
DOMException::INDEX_SIZE_ERR, OK, 104 );
XMLString::transcode("Stuff inserted", tempStr, 3999);
@@ -5608,5 +5609,110 @@
XMLString::removeWS(tempStr);
TEST_STRING(tempStr, tempStr2);
+ if(XMLString::stringLen((XMLCh*)0)!=0)
+ {
+ fprintf(stderr, "strLen test failed at line %i\n", __LINE__);
+ OK = false;
+ }
+ if(XMLString::stringLen(XMLUni::fgZeroLenString)!=0)
+ {
+ fprintf(stderr, "strLen test failed at line %i\n", __LINE__);
+ OK = false;
+ }
+ XMLCh one[2]={ chLatin_A, chNull };
+ if(XMLString::stringLen(one)!=1)
+ {
+ fprintf(stderr, "strLen test failed at line %i\n", __LINE__);
+ OK = false;
+ }
+ XMLCh two[3]={ chLatin_A, chLatin_B, chNull };
+ if(XMLString::stringLen(two)!=2)
+ {
+ fprintf(stderr, "strLen test failed at line %i\n", __LINE__);
+ OK = false;
+ }
+
+ // this tests the cached bit storage
+ CMStateSet setT(60);
+ setT.setBit(8);
+ setT.setBit(52);
+ setT.setBit(34);
+
+ if(!setT.getBit(8) || !setT.getBit(52) || !setT.getBit(34))
+ {
+ fprintf(stderr, "bitset test failed at line %i\n", __LINE__);
+ OK = false;
+ }
+
+ CMStateSetEnumerator enumT(&setT);
+ if(!enumT.hasMoreElements() || enumT.nextElement()!=8)
+ {
+ fprintf(stderr, "bitset test failed at line %i\n", __LINE__);
+ OK = false;
+ }
+ if(!enumT.hasMoreElements() || enumT.nextElement()!=34)
+ {
+ fprintf(stderr, "bitset test failed at line %i\n", __LINE__);
+ OK = false;
+ }
+ if(!enumT.hasMoreElements() || enumT.nextElement()!=52)
+ {
+ fprintf(stderr, "bitset test failed at line %i\n", __LINE__);
+ OK = false;
+ }
+ if(enumT.hasMoreElements())
+ {
+ fprintf(stderr, "bitset test failed at line %i\n", __LINE__);
+ OK = false;
+ }
+
+ // this tests the dynamic bit storage
+ CMStateSet setT2(3 * CMSTATE_BITFIELD_CHUNK);
+ setT2.setBit(0); // first block, begin
+ setT2.setBit(CMSTATE_BITFIELD_CHUNK/2 -1); // first block, middle
+ setT2.setBit(CMSTATE_BITFIELD_CHUNK/2); // first block, middle
+ setT2.setBit(CMSTATE_BITFIELD_CHUNK/2 +1); // first block, middle
+ setT2.setBit(CMSTATE_BITFIELD_CHUNK-1); // first block, end
+ setT2.setBit(2*CMSTATE_BITFIELD_CHUNK); // last block, begin
+ setT2.setBit(2*CMSTATE_BITFIELD_CHUNK + CMSTATE_BITFIELD_CHUNK/2 -1); //
last block, middle
+ setT2.setBit(2*CMSTATE_BITFIELD_CHUNK + CMSTATE_BITFIELD_CHUNK/2); // last
block, middle
+ setT2.setBit(2*CMSTATE_BITFIELD_CHUNK + CMSTATE_BITFIELD_CHUNK/2 +1); //
last block, middle
+ setT2.setBit(3*CMSTATE_BITFIELD_CHUNK-1); // last block, end
+
+ // test just a few ones
+ if(!setT2.getBit(0) || !setT2.getBit(CMSTATE_BITFIELD_CHUNK-1) ||
!setT2.getBit(2*CMSTATE_BITFIELD_CHUNK + CMSTATE_BITFIELD_CHUNK/2 +1))
+ {
+ fprintf(stderr, "bitset test failed at line %i\n", __LINE__);
+ OK = false;
+ }
+
+ if(setT2.getBitCountInRange(0, 3*CMSTATE_BITFIELD_CHUNK)!=10)
+ {
+ fprintf(stderr, "bitset test failed at line %i\n", __LINE__);
+ OK = false;
+ }
+ CMStateSetEnumerator enumT2(&setT2);
+ XMLSize_t count=0;
+ while(enumT2.hasMoreElements())
+ {
+ count++;
+ enumT2.nextElement();
+ }
+ if(count!=10)
+ {
+ fprintf(stderr, "bitset test failed at line %i\n", __LINE__);
+ OK = false;
+ }
+
+ // this tests the hash generator
+ CMStateSet setT3(3 * CMSTATE_BITFIELD_CHUNK), setT4(3 *
CMSTATE_BITFIELD_CHUNK);
+ // these two sets will have a single bit set at the beginning of a chunk
+ setT3.setBit(0);
+ setT4.setBit(CMSTATE_BITFIELD_CHUNK);
+ if(setT3.hashCode()==setT4.hashCode())
+ {
+ fprintf(stderr, "bitset test failed at line %i\n", __LINE__);
+ OK = false;
+ }
return OK;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]