Author: amassari
Date: Wed Aug 26 16:01:20 2009
New Revision: 808072

URL: http://svn.apache.org/viewvc?rev=808072&view=rev
Log:
Use SSE2 instructions to speed up bit operations used in schema validation with 
large maxOccurs

Modified:
    xerces/c/trunk/configure.ac
    xerces/c/trunk/src/xercesc/util/PlatformUtils.cpp
    xerces/c/trunk/src/xercesc/util/PlatformUtils.hpp
    xerces/c/trunk/src/xercesc/util/Xerces_autoconf_config.hpp.in
    xerces/c/trunk/src/xercesc/util/Xerces_autoconf_config.msvc.hpp
    xerces/c/trunk/src/xercesc/validators/common/CMStateSet.hpp

Modified: xerces/c/trunk/configure.ac
URL: 
http://svn.apache.org/viewvc/xerces/c/trunk/configure.ac?rev=808072&r1=808071&r2=808072&view=diff
==============================================================================
--- xerces/c/trunk/configure.ac (original)
+++ xerces/c/trunk/configure.ac Wed Aug 26 16:01:20 2009
@@ -99,6 +99,7 @@
                   unistd.h wchar.h wctype.h \
                   CoreServices/CoreServices.h \
                   endian.h machine/endian.h arpa/nameser_compat.h \
+                  intrin.h emmintrin.h cpuid.h \
                  ])
 
 # Checks for typedefs, structures, and compiler characteristics.
@@ -228,16 +229,22 @@
 # Allow the user to specify the pkgconfig directory.
 #
 AC_ARG_WITH(pkgconfigdir,
-       AC_HELP_STRING([--with-pkgconfigdir=DIR],
-       [Specify location of pkgconfig dir (default is libdir/pkgconfig)]),
+       AC_HELP_STRING([--with-pkgconfigdir=DIR],[Specify location of pkgconfig 
dir (default is libdir/pkgconfig)]),
        [pkgconfigdir=${withval}],
        [pkgconfigdir='${libdir}/pkgconfig'])
 
 AC_SUBST([pkgconfigdir])
 
+# Allow the user to disable the SSE2 support
+#
+AC_ARG_ENABLE(sse2, 
+    AC_HELP_STRING([--disable-sse2],[disable SSE2 optimizations]), 
+    [have_sse2=${enableval}], 
+    [have_sse2=yes])
+
 ######################################################
 # Define some namespace-protected macros for use in the
-# publicly visible XercesConfig.h file.
+# publicly visible Xerces_autoconf_config.h file.
 ######################################################
 
 AC_DEFINE([XERCES_AUTOCONF], 1, [Define to true if autoconf is used in this 
configuration])
@@ -246,6 +253,10 @@
        AC_DEFINE([XERCES_HAVE_SYS_TYPES_H], 1, [Define to 1 if we have 
sys/types.h]))
 AS_IF([test x$ac_cv_header_inttypes_h = xyes],
        AC_DEFINE([XERCES_HAVE_INTTYPES_H],    1, [Define to 1 if we have 
inttypes.h]))
+AS_IF([test x$ac_cv_header_intrin_h = xyes],
+       AC_DEFINE([XERCES_HAVE_INTRIN_H], 1, [Define to 1 if we have intrin.h]))
+AS_IF([test x$ac_cv_header_emmintrin_h = xyes],
+       AC_DEFINE([XERCES_HAVE_EMMINTRIN_H], 1, [Define to 1 if we have 
emmintrin.h]))
 
 case $host in
 *-*-msdos* | *-*-mingw32* | *-*-cygwin* | *-*-windows* )
@@ -307,6 +318,89 @@
                     ]
                  )
 
+if test "$have_sse2" = "yes"; then
+    save_CXXFLAGS="$CXXFLAGS"
+    CXXFLAGS="$CXXFLAGS -msse2"
+       AC_MSG_CHECKING([whether we need to add -msse2])
+       AC_COMPILE_IFELSE(  [AC_LANG_PROGRAM([[#include <emmintrin.h>]], 
[[__m128i one;]])],
+                                               [msse2_ok=yes],
+                                               [msse2_ok=no]
+                                        )
+    AC_MSG_RESULT($msse2_ok)
+    if test x"$msse2_ok" = xno; then
+        CXXFLAGS="$save_CXXFLAGS"
+    fi
+
+    save_CXXFLAGS="$CXXFLAGS"
+    CXXFLAGS="$CXXFLAGS -xarch=sse2"
+       AC_MSG_CHECKING([whether we need to add -xarch=sse2])
+       AC_COMPILE_IFELSE(  [AC_LANG_PROGRAM([[#include <emmintrin.h>]], 
[[__m128i one;]])],
+                                               [xarchsse2_ok=yes],
+                                               [xarchsse2_ok=no]
+                                        )
+    AC_MSG_RESULT($xarchsse2_ok)
+    if test x"$xarchsse2_ok" = xno; then
+        CXXFLAGS="$save_CXXFLAGS"
+    fi
+
+       AC_MSG_CHECKING([whether the compiler has the CPUID intrinsic])
+       AC_COMPILE_IFELSE(  [AC_LANG_PROGRAM([[#if HAVE_INTRIN_H
+                                                                               
        #include <intrin.h>
+                                                                               
   #endif
+                                                                               
 ]],
+                                                                               
 [[int CPUInfo[4];
+                                                                               
   __cpuid(CPUInfo, 1);
+                                                                               
 ]])],
+                                               [
+                                                 AC_MSG_RESULT([yes])
+                                                 
AC_DEFINE_UNQUOTED([XERCES_HAVE_CPUID_INTRINSIC], 1, [Define to have SSE2 
instruction support detected at runtime using __cpuid])
+                                               ],
+                                               [
+                                                 AC_MSG_RESULT([no])
+                                               ]
+                                        )
+
+       AC_MSG_CHECKING([whether the compiler has the _get_cpuid intrinsic])
+       AC_COMPILE_IFELSE(  [AC_LANG_PROGRAM([[#if HAVE_CPUID_H
+                                                                               
        #include <cpuid.h>
+                                                                               
   #endif
+                                                                               
 ]],
+                                                                               
 [[unsigned int eax, ebx, ecx, edx;
+                                           __get_cpuid (1, &eax, &ebx, &ecx, 
&edx);
+                                                                               
 ]])],
+                                               [
+                                                 AC_MSG_RESULT([yes])
+                                                 
AC_DEFINE_UNQUOTED([XERCES_HAVE_GETCPUID], 1, [Define to have SSE2 instruction 
support detected at runtime using __get_cpuid])
+                                               ],
+                                               [
+                                                 AC_MSG_RESULT([no])
+                                               ]
+                                        )
+
+       AC_MSG_CHECKING([whether the compiler has the SSE2 intrinsic])
+       AC_COMPILE_IFELSE(  [AC_LANG_PROGRAM([[#if HAVE_EMMINTRIN_H
+                                                                               
        #include <emmintrin.h>
+                                                                               
   #endif
+                                                                               
 ]],
+                                                                               
 [[__m128i* one=(__m128i*)_mm_malloc(4, 16);
+                                                                               
   __m128i* two=(__m128i*)_mm_malloc(4, 16);
+                                                                               
   __m128i xmm1 = _mm_load_si128(one);
+                                                                               
   __m128i xmm2 = _mm_load_si128(two);
+                                                                               
   __m128i xmm3 = _mm_or_si128(xmm1, xmm2);
+                                                                               
   _mm_store_si128(one, xmm3);
+                                                                               
   _mm_free(one);
+                                                                               
   _mm_free(two);
+                                                                               
 ]])],
+                                               [
+                                                 AC_MSG_RESULT([yes])
+                                                 
AC_DEFINE_UNQUOTED([XERCES_HAVE_SSE2_INTRINSIC], 1, [Define to have SSE2 
instruction used at runtime])
+                                               ],
+                                               [
+                                                 AC_MSG_RESULT([no])
+                                               ]
+                                        )
+
+fi
 
 AS_IF([test x$ac_cv_type_size_t = xyes],
        AC_DEFINE([XERCES_SIZE_T], [size_t], [Define as the appropriate size_t 
type]),

Modified: xerces/c/trunk/src/xercesc/util/PlatformUtils.cpp
URL: 
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/PlatformUtils.cpp?rev=808072&r1=808071&r2=808072&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/PlatformUtils.cpp (original)
+++ xerces/c/trunk/src/xercesc/util/PlatformUtils.cpp Wed Aug 26 16:01:20 2009
@@ -37,6 +37,9 @@
 #if HAVE_SYS_TIMEB_H
 #      include <sys/timeb.h>
 #endif
+#if HAVE_CPUID_H
+#   include <cpuid.h>
+#endif
 
 #include <xercesc/util/Mutexes.hpp>
 #include <xercesc/util/PlatformUtils.hpp>
@@ -50,6 +53,10 @@
 #include <xercesc/util/XMLInitializer.hpp>
 #include <xercesc/internal/MemoryManagerImpl.hpp>
 
+#if XERCES_HAVE_INTRIN_H
+#   include <intrin.h>
+#endif
+
 #include <xercesc/util/XMLFileMgr.hpp>
 #if XERCES_USE_FILEMGR_POSIX
 #      include <xercesc/util/FileManagers/PosixFileMgr.hpp>
@@ -147,6 +154,7 @@
 XMLMutex*               XMLPlatformUtils::fgAtomicMutex = 0;
 
 bool                    XMLPlatformUtils::fgXMLChBigEndian = true;
+bool                    XMLPlatformUtils::fgSSE2ok = false;
 
 // ---------------------------------------------------------------------------
 //  XMLPlatformUtils: Init/term methods
@@ -215,6 +223,26 @@
     endianTest.ch = 1;
     fgXMLChBigEndian = (endianTest.ar[sizeof(XMLCh)-1] == 1);
 
+    // Determine if we can use SSE2 functions
+#if defined(XERCES_HAVE_CPUID_INTRINSIC)
+    int CPUInfo[4]={0};
+    __cpuid(CPUInfo, 1);
+    if(CPUInfo[3] & (1UL << 26))
+        fgSSE2ok = true;
+    else
+        fgSSE2ok = false;
+#elif defined(XERCES_HAVE_GETCPUID)
+    unsigned int eax, ebx, ecx, edx;
+    if(!__get_cpuid (1, &eax, &ebx, &ecx, &edx) || (edx & (1UL << 26))==0)
+        fgSSE2ok = true;
+    else
+        fgSSE2ok = false;
+#elif defined(XERCES_HAVE_SSE2_INTRINSIC)
+    // if we cannot find out at runtime, assume the define has it right
+    fgSSE2ok = true;
+#else
+    fgSSE2ok = false;
+#endif
 
     // Initialize the platform-specific mutex and file mgrs
     fgMutexMgr         = makeMutexMgr(fgMemoryManager);

Modified: xerces/c/trunk/src/xercesc/util/PlatformUtils.hpp
URL: 
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/PlatformUtils.hpp?rev=808072&r1=808071&r2=808072&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/PlatformUtils.hpp (original)
+++ xerces/c/trunk/src/xercesc/util/PlatformUtils.hpp Wed Aug 26 16:01:20 2009
@@ -132,7 +132,7 @@
     static XMLMutex*            fgAtomicMutex;
 
     static bool                 fgXMLChBigEndian;
-
+    static bool                 fgSSE2ok;
     //@}
 
 

Modified: xerces/c/trunk/src/xercesc/util/Xerces_autoconf_config.hpp.in
URL: 
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/Xerces_autoconf_config.hpp.in?rev=808072&r1=808071&r2=808072&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/Xerces_autoconf_config.hpp.in (original)
+++ xerces/c/trunk/src/xercesc/util/Xerces_autoconf_config.hpp.in Wed Aug 26 
16:01:20 2009
@@ -51,6 +51,8 @@
 #undef XERCES_AUTOCONF
 #undef XERCES_HAVE_SYS_TYPES_H
 #undef XERCES_HAVE_INTTYPES_H
+#undef XERCES_HAVE_INTRIN_H
+#undef XERCES_HAVE_EMMINTRIN_H
 #undef XERCES_INCLUDE_WCHAR_H
 
 #undef XERCES_S16BIT_INT
@@ -72,6 +74,10 @@
 #undef XERCES_NO_NATIVE_BOOL
 #undef XERCES_LSTRSUPPORT
 
+#undef XERCES_HAVE_CPUID_INTRINSIC
+#undef XERCES_HAVE_SSE2_INTRINSIC
+#undef XERCES_HAVE_GETCPUID
+
 #undef XERCES_PLATFORM_EXPORT
 #undef XERCES_PLATFORM_IMPORT
 

Modified: xerces/c/trunk/src/xercesc/util/Xerces_autoconf_config.msvc.hpp
URL: 
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/Xerces_autoconf_config.msvc.hpp?rev=808072&r1=808071&r2=808072&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/Xerces_autoconf_config.msvc.hpp (original)
+++ xerces/c/trunk/src/xercesc/util/Xerces_autoconf_config.msvc.hpp Wed Aug 26 
16:01:20 2009
@@ -105,6 +105,11 @@
 
 #define XERCES_MFC_SUPPORT
 
+#define XERCES_HAVE_INTRIN_H 1
+#define XERCES_HAVE_EMMINTRIN_H 1
+#define XERCES_HAVE_CPUID_INTRINSIC
+#define XERCES_HAVE_SSE2_INTRINSIC
+
 // ---------------------------------------------------------------------------
 //  XMLSize_t is the unsigned integral type.
 // ---------------------------------------------------------------------------

Modified: xerces/c/trunk/src/xercesc/validators/common/CMStateSet.hpp
URL: 
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/validators/common/CMStateSet.hpp?rev=808072&r1=808071&r2=808072&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/validators/common/CMStateSet.hpp (original)
+++ xerces/c/trunk/src/xercesc/validators/common/CMStateSet.hpp Wed Aug 26 
16:01:20 2009
@@ -37,12 +37,17 @@
 #include <xercesc/framework/MemoryManager.hpp>
 #include <string.h>
 
+#if XERCES_HAVE_EMMINTRIN_H
+#   include <emmintrin.h>
+#endif
+
 XERCES_CPP_NAMESPACE_BEGIN
 
 class CMStateSetEnumerator;
 
 #define CMSTATE_CACHED_INT32_SIZE  4
 
+// This value must be a multiple of 128 in order to use the SSE2 instruction 
set
 #define CMSTATE_BITFIELD_CHUNK  1024
 #define CMSTATE_BITFIELD_INT32_SIZE (1024 / 32)
 
@@ -119,7 +124,7 @@
             {
                 if(toCopy.fDynamicBuffer->fBitArray[index]!=NULL)
                 {
-                    
fDynamicBuffer->fBitArray[index]=(XMLInt32*)fDynamicBuffer->fMemoryManager->allocate(CMSTATE_BITFIELD_INT32_SIZE
 * sizeof(XMLInt32));
+                    allocateChunk(index);
                     memcpy((void *) fDynamicBuffer->fBitArray[index],
                            (const void *) 
toCopy.fDynamicBuffer->fBitArray[index],
                            CMSTATE_BITFIELD_INT32_SIZE * sizeof(XMLInt32));
@@ -141,10 +146,8 @@
         if(fDynamicBuffer)
         {
             for(XMLSize_t index = 0; index < fDynamicBuffer->fArraySize; 
index++)
-            {
                 if(fDynamicBuffer->fBitArray[index]!=NULL)
-                    
fDynamicBuffer->fMemoryManager->deallocate(fDynamicBuffer->fBitArray[index]);
-            }
+                    deallocateChunk(index);
             
fDynamicBuffer->fMemoryManager->deallocate(fDynamicBuffer->fBitArray);
             fDynamicBuffer->fMemoryManager->deallocate(fDynamicBuffer);
         }
@@ -158,7 +161,7 @@
     {
         if(fDynamicBuffer==0)
         {
-            for (XMLSize_t index = 0; index < CMSTATE_CACHED_INT32_SIZE; 
index++)
+           for (XMLSize_t index = 0; index < CMSTATE_CACHED_INT32_SIZE; 
index++)
                 if(setToOr.fBits[index])
                     if(fBits[index])
                         fBits[index] |= setToOr.fBits[index];
@@ -168,27 +171,46 @@
         else
         {
             for (XMLSize_t index = 0; index < fDynamicBuffer->fArraySize; 
index++)
-                if(setToOr.fDynamicBuffer->fBitArray[index]!=NULL)
+            {
+                XMLInt32 *& other = setToOr.fDynamicBuffer->fBitArray[index];
+                if(other!=NULL)
                 {
                     // if we haven't allocated the subvector yet, allocate it 
and copy
                     if(fDynamicBuffer->fBitArray[index]==NULL)
                     {
-                        
fDynamicBuffer->fBitArray[index]=(XMLInt32*)fDynamicBuffer->fMemoryManager->allocate(CMSTATE_BITFIELD_INT32_SIZE
 * sizeof(XMLInt32));
-                        memcpy((void *) fDynamicBuffer->fBitArray[index],
-                               (const void *) 
setToOr.fDynamicBuffer->fBitArray[index],
+                        allocateChunk(index);
+                        memcpy((void *) fDynamicBuffer->fBitArray[index], 
+                               (const void *) other, 
                                CMSTATE_BITFIELD_INT32_SIZE * sizeof(XMLInt32));
                     }
                     else
                     {
                         // otherwise, merge them
-                        for(XMLSize_t subIndex = 0; subIndex < 
CMSTATE_BITFIELD_INT32_SIZE; subIndex++)
-                            
if(setToOr.fDynamicBuffer->fBitArray[index][subIndex])
-                                if(fDynamicBuffer->fBitArray[index][subIndex])
-                                    fDynamicBuffer->fBitArray[index][subIndex] 
|= setToOr.fDynamicBuffer->fBitArray[index][subIndex];
-                                else
-                                    fDynamicBuffer->fBitArray[index][subIndex] 
= setToOr.fDynamicBuffer->fBitArray[index][subIndex];
+                        XMLInt32*& mine = fDynamicBuffer->fBitArray[index];
+#ifdef XERCES_HAVE_SSE2_INTRINSIC
+                        if(XMLPlatformUtils::fgSSE2ok)
+                        {
+                            for(XMLSize_t subIndex = 0; subIndex < 
CMSTATE_BITFIELD_INT32_SIZE; subIndex+=4)
+                            {
+                               __m128i xmm1 = 
_mm_load_si128((__m128i*)&other[subIndex]);
+                               __m128i xmm2 = 
_mm_load_si128((__m128i*)&mine[subIndex]);
+                               __m128i xmm3 = _mm_or_si128(xmm1, xmm2);     // 
 OR  4 32-bit words
+                               _mm_store_si128((__m128i*)&mine[subIndex], 
xmm3);
+                            }
+                        }
+                        else
+#endif
+                        {
+                            for(XMLSize_t subIndex = 0; subIndex < 
CMSTATE_BITFIELD_INT32_SIZE; subIndex++)
+                                
if(setToOr.fDynamicBuffer->fBitArray[index][subIndex])
+                                    
if(fDynamicBuffer->fBitArray[index][subIndex])
+                                        
fDynamicBuffer->fBitArray[index][subIndex] |= 
setToOr.fDynamicBuffer->fBitArray[index][subIndex];
+                                    else
+                                        
fDynamicBuffer->fBitArray[index][subIndex] = 
setToOr.fDynamicBuffer->fBitArray[index][subIndex];
+                        }
                     }
                 }
+            }
         }
     }
 
@@ -209,14 +231,16 @@
         {
             for (XMLSize_t index = 0; index < fDynamicBuffer->fArraySize; 
index++)
             {
-                if(fDynamicBuffer->fBitArray[index]==NULL && 
setToCompare.fDynamicBuffer->fBitArray[index]==NULL)
+                XMLInt32 *& other = 
setToCompare.fDynamicBuffer->fBitArray[index], 
+                         *& mine = fDynamicBuffer->fBitArray[index];
+                if(mine==NULL && other==NULL)
                     continue;
-                else if(fDynamicBuffer->fBitArray[index]==NULL || 
setToCompare.fDynamicBuffer->fBitArray[index]==NULL) // the other should have 
been empty too
+                else if(mine==NULL || other==NULL) // the other should have 
been empty too
                     return false;
                 else
                 {
                     for(XMLSize_t subIndex = 0; subIndex < 
CMSTATE_BITFIELD_INT32_SIZE; subIndex++)
-                        
if(fDynamicBuffer->fBitArray[index][subIndex]!=setToCompare.fDynamicBuffer->fBitArray[index][subIndex])
+                        if(mine[subIndex]!=other[subIndex])
                             return false;
                 }
             }
@@ -248,16 +272,13 @@
                 {
                     // delete this subentry
                     if(fDynamicBuffer->fBitArray[index]!=NULL)
-                    {
-                        
fDynamicBuffer->fMemoryManager->deallocate(fDynamicBuffer->fBitArray[index]);
-                        fDynamicBuffer->fBitArray[index]=NULL;
-                    }
+                        deallocateChunk(index);
                 }
                 else
                 {
                     // if we haven't allocated the subvector yet, allocate it 
and copy
                     if(fDynamicBuffer->fBitArray[index]==NULL)
-                        
fDynamicBuffer->fBitArray[index]=(XMLInt32*)fDynamicBuffer->fMemoryManager->allocate(CMSTATE_BITFIELD_INT32_SIZE
 * sizeof(XMLInt32));
+                        allocateChunk(index);
                     memcpy((void *) fDynamicBuffer->fBitArray[index],
                            (const void *) 
srcSet.fDynamicBuffer->fBitArray[index],
                            CMSTATE_BITFIELD_INT32_SIZE * sizeof(XMLInt32));
@@ -279,7 +300,7 @@
                 if (fBits[index] != 0)
                     for(int i=0;i<32;i++)
                     {
-                        XMLInt32 mask=(1UL << i);
+                        const XMLInt32 mask = 1UL << i;
                         if(fBits[index] & mask)
                             count++;
                     }
@@ -298,7 +319,7 @@
                     if (fDynamicBuffer->fBitArray[index][subIndex] != 0)
                         for(int i=0;i<32;i++)
                         {
-                            XMLInt32 mask=(1UL << i);
+                            const XMLInt32 mask = 1UL << i;
                             if(fDynamicBuffer->fBitArray[index][subIndex] & 
mask)
                                 count++;
                         }
@@ -319,7 +340,7 @@
         // And access the right bit and byte
         if(fDynamicBuffer==0)
         {
-            const XMLInt32 mask = (0x1UL << (bitToGet % 32));
+            const XMLInt32 mask = 1UL << (bitToGet % 32);
             const XMLSize_t byteOfs = bitToGet / 32;
             return (fBits[byteOfs]!=0 && (fBits[byteOfs] & mask) != 0);
         }
@@ -328,7 +349,7 @@
             const XMLSize_t vectorOfs = bitToGet / CMSTATE_BITFIELD_CHUNK;
             if(fDynamicBuffer->fBitArray[vectorOfs]==NULL)
                 return false;
-            const XMLInt32 mask = (0x1UL << (bitToGet % 32));
+            const XMLInt32 mask = 1UL << (bitToGet % 32);
             const XMLSize_t byteOfs = (bitToGet % CMSTATE_BITFIELD_CHUNK) / 32;
             return (fDynamicBuffer->fBitArray[vectorOfs][byteOfs]!=0 && 
(fDynamicBuffer->fBitArray[vectorOfs][byteOfs] & mask) != 0);
         }
@@ -368,7 +389,7 @@
             else
                 ThrowXML(ArrayIndexOutOfBoundsException, 
XMLExcepts::Bitset_BadIndex);
 
-        const XMLInt32 mask = (0x1UL << (bitToSet % 32));
+        const XMLInt32 mask = 1UL << (bitToSet % 32);
 
         // And access the right bit and byte
         if(fDynamicBuffer==0)
@@ -382,7 +403,7 @@
             const XMLSize_t vectorOfs = bitToSet / CMSTATE_BITFIELD_CHUNK;
             if(fDynamicBuffer->fBitArray[vectorOfs]==NULL)
             {
-                
fDynamicBuffer->fBitArray[vectorOfs]=(XMLInt32*)fDynamicBuffer->fMemoryManager->allocate(CMSTATE_BITFIELD_INT32_SIZE
 * sizeof(XMLInt32));
+                allocateChunk(vectorOfs);
                 for(XMLSize_t index=0;index < CMSTATE_BITFIELD_INT32_SIZE; 
index++)
                     fDynamicBuffer->fBitArray[vectorOfs][index]=0;
             }
@@ -404,10 +425,7 @@
             for (XMLSize_t index = 0; index < fDynamicBuffer->fArraySize; 
index++)
                 // delete this subentry
                 if(fDynamicBuffer->fBitArray[index]!=NULL)
-                {
-                    
fDynamicBuffer->fMemoryManager->deallocate(fDynamicBuffer->fBitArray[index]);
-                    fDynamicBuffer->fBitArray[index]=NULL;
-                }
+                    deallocateChunk(index);
         }
     }
 
@@ -441,6 +459,29 @@
     // -----------------------------------------------------------------------
     CMStateSet();
 
+    // -----------------------------------------------------------------------
+    // Helpers
+    // -----------------------------------------------------------------------
+    void allocateChunk(const XMLSize_t index)
+    {
+#ifdef XERCES_HAVE_SSE2_INTRINSIC
+        if(XMLPlatformUtils::fgSSE2ok)
+            
fDynamicBuffer->fBitArray[index]=(XMLInt32*)_mm_malloc(CMSTATE_BITFIELD_INT32_SIZE
 * sizeof(XMLInt32), 16);
+        else
+#endif
+            
fDynamicBuffer->fBitArray[index]=(XMLInt32*)fDynamicBuffer->fMemoryManager->allocate(CMSTATE_BITFIELD_INT32_SIZE
 * sizeof(XMLInt32));
+    }
+
+    void deallocateChunk(const XMLSize_t index)
+    {
+#ifdef XERCES_HAVE_SSE2_INTRINSIC
+        if(XMLPlatformUtils::fgSSE2ok)
+            _mm_free(fDynamicBuffer->fBitArray[index]);
+        else
+#endif
+            
fDynamicBuffer->fMemoryManager->deallocate(fDynamicBuffer->fBitArray[index]);
+        fDynamicBuffer->fBitArray[index]=NULL;
+    }
 
     // -----------------------------------------------------------------------
     //  Private data members
@@ -484,7 +525,7 @@
         {
             for(XMLSize_t i=0;i< (start - fIndexCount);i++)
             {
-                XMLInt32 mask=(1UL << i);
+                XMLInt32 mask=1UL << i;
                 if(fLastValue & mask)
                     fLastValue &= ~mask;
             }
@@ -503,7 +544,7 @@
     {
         for(int i=0;i<32;i++)
         {
-            XMLInt32 mask=(1UL << i);
+            XMLInt32 mask=1UL << i;
             if(fLastValue & mask)
             {
                 fLastValue &= ~mask;



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to