Thanks a lot David for the help and advices.

In fact, in the test program that has been developed to isolate the issue,
there is one thread that is updating a Xerces DOM Document by doing a set
on some nodes. The other thread is building an XML document using just
XalanDOMString and leveraging all the operators of this String class ("+="
for example)

This the the pstack output of the issue (see below)
The other thread is really inside the XalanDOMString code in the append
method.

Concerning our real program, we have taken care to protect all the
multithread access to the Xerces DOM Document with mutexes, but in fact we
never thought that the XalanDOMString could not be multithreaded safe under
Solaris and it could be difficult now to put mutexes when we use this
class. Thus I really would like to find a solution at the source :-)

I will try to see with new version of gcc if it could solve the issue.

I have seen some modifications in Xalan 1.4 in ArenaBlock.hpp (destructor)
or XalanDOMString - Also the new class xalandomstringallocator has been
created
in Xalan 1.4. Is there any chance that the XalanDOMString in Xalan 1.3
could corrupt the memory and wirte or delete invalid memory for this thread
that could have an impact on the other thread ?

Regards,

Richard Marsot

# pstack core
core 'core' of 27169:   omu_unittest
-----------------  lwp# 1 / thread# 1  --------------------
 fe8339b0 __cl__Ct4less1ZP8NodeImplRCP8NodeImplT1 (1536e4, ffbee358,
730074, 8d780, 1, 0) + 14
 fe8467e4
insert_unique__t8_Rb_tree5ZP8NodeImplZt4pair2ZCP8NodeImplZP9XalanNodeZt10_Select1st1Zt4pair2ZCP8NodeImplZP9XalanNodeZt4less1ZP8NodeImplZt9allocator1ZP9XalanNodeRCt4pair2ZCP8NodeImplZP9XalanNode
 (1536dc, ffbee358, ffbee358, 8d780,0, 0) + 8c
 fe846d10
insert__t3map4ZP8NodeImplZP9XalanNodeZt4less1ZP8NodeImplZt9allocator1ZP9XalanNodeRCt4pair2ZCP8NodeImplZP9XalanNode
 (1536dc, ffbee358, ffbee3cc, 0, 0,0) + 1c
 fe437a28 addAssociation__20XercesToXalanNodeMapRC8DOM_NodeP9XalanNode
(1536d0,ffbee4f0, 8d780, 72cd0, 0, ff00) + 4c
 fe424884 createBridgeNode__C20XercesDocumentBridgeRC11DOM_ElementUlb
(1536c0, ffbee4f0, 0, 1, 1, 0) + a4
 fe425558 createBridgeNode__C20XercesDocumentBridgeRC8DOM_NodeUlb (1536c0,
ffbee4f0, 0, 1, 81010100, ff00) + 294
 fe423b90 mapNode__C20XercesDocumentBridgeP8NodeImpl (1536c0, 77f80, 0,
50160, 0, ffbee550) + 24c
 fe424238 mapNode__C20XercesDocumentBridgeRC11DOM_Element (1536c0,
ffbee5e0, ffffffff, 0, fe435474, 0) + 20
 fe428d08 getDocumentElement__C20XercesDocumentBridge (1536c0, fe428cc4,
ffffffff, 0, ffbee252, 239fa) + 44
 ff2f50dc omuSetProperty__FG12DOM_DocumentPCcT1 (ffbee7c8, 236d8, ffbee7d0,
23800, 6c6a, fea489e0) + 148
 0001904c omwSetProperty__Fv (346b0, 0, 19498, 0, 10002, 80000000) + c4
 00017dfc main     (1, ffbee9dc, ffbee9e4, 37540, 0, 0) + 158
 00017b6c _start   (0, 0, 0, 0, 0, 0) + 5c
-----------------  lwp# 2 / thread# 2  --------------------
 ff21ba1c _signotifywait (ff37e000, 59, 0, 0, ffbee35c, 4) + 8
 ff36206c thr_yield (0, 0, 0, 0, 0, 0) + 8c
-----------------  lwp# 3  --------------------------------
 ff2195c0 _door_return (3, ff37f690, ff37f6a8, 3, ff37e000, 1) + 10
 ff35a770 _lwp_start (fdcf5d70, 0, 6000, ffbee334, 0, 0) + 18
 ff36206c thr_yield (0, 0, 0, 0, 0, 0) + 8c
-----------------  lwp# 4 / thread# 4  --------------------
 00022684 invariants__C14XalanDOMString (ff3326b0, 0, 3cd68, 3cd6c,
81010100, ff0000) + ec
 0002277c length__C14XalanDOMString (ff3326b0, 3cd68, 2, 1, 81010100, ff00)
+ c ff3122a4 append__14XalanDOMStringRC14XalanDOMString (fda0bb20,
ff3326b0, ffffffff, ff1c1acc, 0, 0) + 20
 ff30ee78 __apl__14XalanDOMStringRC14XalanDOMString (fda0bb20, ff3326b0,
ff332400, ff315000, 0, fda0b939) + 14
 ff2f3bb8 omuNotifyBuild__FPCcT0PC14XalanDOMStringN22P14XalanDOMStringT2N52
(23a48, 23700, 118180, 37410, 374a0, fda0bb20) + 140
 00019234 omwNotify__Fv (346b0, 0, ff3a192c, ff3e6694, 0, 1) + 174
 000194c0 __notifyThread__FPv (0, 19498, fdf7df68, fde4808c, 0, 0) + 28
 fde481c8 invoke_i__18ACE_Thread_Adapter (46788, fdf7df68, fde4816c, 0, 0,
0) +
5c
 fde48124 invoke__18ACE_Thread_Adapter (46788, fdf7df68, fde480c0, 0, 0, 0)
+ 64
 fde16718 ace_thread_adapter (46788, fdce3d10, 1, ff38ad94, 0, 2) + 10
 ff36b730 _thread_start (46788, 0, 0, 0, 0, 0) + 40
--------------------------  thread# 3  --------------------
 ff35ddf8 _reap_wait (ff3829e8, 204e4, 0, ff37e000, 0, 0) + 38
 ff35db50 _reaper  (ff37ee38, ff384748, ff3829e8, ff37ee10, 1, fe400000) +
38
 ff36b730 _thread_start (0, 0, 0, 0, 0, 0) + 40




                                                                                
                                                       
                      David N                                                   
                                                       
                      Bertoni/Cambridge        To:       
<[email protected]>                                                
                      /IBM                     cc:                              
                                                       
                      <david_n_bertoni@        Subject:  Re: STL Multithreading 
issue under Sparc Solaris (gcc 2.95.3 - Xalan 1.3)     
                      us.ibm.com>                                               
                                                       
                                                                                
                                                       
                      01/07/2003 11:45                                          
                                                       
                      AM                                                        
                                                       
                                                                                
                                                       
                                                                                
                                                       








Hi Richard,

This could be a library issue with that version of the GCC compiler.  You
could try the STLport library, but I don't have any experience using it on
Solaris with GCC, so I don't know if that will fix the problem or not.  The
crash indicates you're in a routine where nodes for a std:map are being
allocated, and this is a common place for thread-safety problems in old
versions of the STL that were based on the SGI version.

It could also be a problem with the way you're using Xalan classes.  I can
see you're using the bridge from the Xerces DOM to Xalan.  If that's the
case, you must be _very_ careful if you're trying to share DOM instances
amongst threads.  See the documentation for the class XercesParserLiaison
for more details.  You'll see there are some accessor functions which
control how the bridge is built, and whether or not it's thread-safe.

If you're not sharing DOM instances between threads, then you can ignore
the previous paragraph, and try the STLport.  You could also try a later
version of GCC as the libraries have improved greatly in the 3.x versions.

Dave




                      "Richard MARSOT"
                      <Richard_MARSOT@         To:
                      [email protected]
                      nmss.com>                cc:      (bcc: David N
                      Bertoni/Cambridge/IBM)
                                               Subject: STL Multithreading
                      issue under Sparc Solaris (gcc 2.95.3 - Xalan 1.3)
                      01/07/2003 05:42
                      AM




Hi,

I am using Xalan C 1.3 under Sparc Solaris with gcc 2.95.3.
In my program, I uses intensively the XalanDOMString class.

It appears that when I use two threads that both uses XalanDOMString
instances, my program core dumps after a while inside the STL library.
If I put mutex between the two threads averything just went fine.

I am struggling to understand if:
1/ this is a known issue in Xalan 1.3 that could have been corrected in 1.4
2/ or is it a known STL issue (not multithread safe under Solaris)

Should I use STLPORT to solve this issue ? If you can give me some advices
to fix this issue, I would really appreciate it.

Thanks a lot for your help.

Richard Marsot.

see below the core dump output

#0  0xfe8339b0 in less<NodeImpl *>::operator() (this=0x1536e4,    __x
[EMAIL PROTECTED], [EMAIL PROTECTED])
    at
/usr/local/lib/gcc-lib/sparc-sun-solaris2.8/2.95.3
/../../../../include/g++-3/stl_function.h:111
111       bool operator()(const _Tp& __x, const _Tp& __y) const { return
__x < __y; }
(gdb) bt
#0  0xfe8339b0 in less<NodeImpl *>::operator() (this=0x1536e4,    __x
[EMAIL PROTECTED], [EMAIL PROTECTED])
    at
/usr/local/lib/gcc-lib/sparc-sun-solaris2.8/2.95.3
/../../../../include/g++-3/stl_function.h:111
#1  0xfe8467ec in _Rb_tree<NodeImpl *, pair<NodeImpl *const, XalanNode *>,
_Select1st<pair<NodeImpl *const, XalanNode *> >, less<NodeImpl *>,
allocator<XalanNode *> >::insert_unique (this=0x1536dc, [EMAIL PROTECTED])
    at
/usr/local/lib/gcc-lib/sparc-sun-solaris2.8/2.95.3
/../../../../include/g++-3/stl_tree.h:884
#2  0xfe846d1c in map<NodeImpl *, XalanNode *, less<NodeImpl *>,
allocator<XalanNode *> >::insert (this=0x1536dc, [EMAIL PROTECTED])
    at
/usr/local/lib/gcc-lib/sparc-sun-solaris2.8/2.95.3
/../../../../include/g++-3/stl_map.h:162
#3  0xfe437a34 in XercesToXalanNodeMap::addAssociation (this=0x1536d0,
[EMAIL PROTECTED], theXalanNode=0x8d780)
    at XercesParserLiaison/XercesToXalanNodeMap.cpp:97
#4  0xfe42488c in XercesDocumentBridge::createBridgeNode (this=0x1536c0,
[EMAIL PROTECTED], theIndex=0, mapNode=true)
    at XercesParserLiaison/XercesDocumentBridge.cpp:408
#5  0xfe425560 in XercesDocumentBridge::createBridgeNode (this=0x1536c0,
[EMAIL PROTECTED], theIndex=0, mapNode=true)
    at XercesParserLiaison/XercesDocumentBridge.cpp:807
#6  0xfe423b98 in XercesDocumentBridge::mapNode (this=0x1536c0,
theXercesNodeImpl=0x77f80)
    at XercesParserLiaison/XercesDocumentBridge.cpp:187
#7  0xfe424240 in XercesDocumentBridge::mapNode (this=0x1536c0,
[EMAIL PROTECTED])
    at XercesParserLiaison/XercesDocumentBridge.cpp:271
#8  0xfe428d10 in XercesDocumentBridge::getDocumentElement (this=0x1536c0)
    at XercesParserLiaison/XercesDocumentBridge.cpp:1442
#9  0xff2f50e4 in omuSetProperty (doc={<DOM_Node> = {        fImpl =
0x50928}, <No data fields>},
    szQuery=0x236d8 "/Properties/Propagation/NumActiveAlarms",
    szValue=0xffbee7d0 "1") at omu.cpp:2453
#10 0x19054 in omwSetProperty () at omu_unittest.cpp:195
#11 0x17e04 in main (argc=1, argv=0xffbee9dc) at omu_unittest.cpp:91








Reply via email to