This checkin comes to fix a deadlock that we reach.
- the first thread gets the port lock, increments the endpt_rdr and releases 
the lock.
- then the second thread gets the lock and wait that endpt_rdr will be 0.
- the first thread tries to get the object lock and stuck. as a result it can't 
decrement the endpt_rdr.

Attached is the stack call

Index: ipoib_port.cpp
===================================================================
--- ipoib_port.cpp            (revision 3095)
+++ ipoib_port.cpp         (working copy)
@@ -7205,7 +7205,10 @@

                /* Wait for all readers to complete. */
                while( p_port->endpt_rdr )
-                              ;
+             {
+                             cl_obj_unlock( &p_port->obj );
+                             cl_obj_lock( &p_port->obj );
+             }
                /*
                 * We don't need to initiate destruction - this is called only
                 * from the __port_destroying function, and destruction cascades
@@ -7240,7 +7243,10 @@
                cl_obj_lock( &p_port->obj );
                /* Wait for all readers to complete. */
                while( p_port->endpt_rdr )
-                              ;
+             {
+                             cl_obj_unlock( &p_port->obj );
+                             cl_obj_lock( &p_port->obj );
+             }

 #if 0
                __endpt_mgr_remove_all(p_port);
@@ -7410,7 +7416,10 @@
                cl_obj_lock( &p_port->obj );
                /* Wait for all readers to complete. */
                while( p_port->endpt_rdr > 1 )
-                              ;
+             {
+                             cl_obj_unlock( &p_port->obj );
+                             cl_obj_lock( &p_port->obj );
+             }

                /* Remove the endpoint from the maps so further requests don't 
find it. */
                cl_qmap_remove_item( &p_port->endpt_mgr.mac_endpts, 
&p_endpt->mac_item );
@@ -7869,7 +7878,10 @@

                /* Wait for all readers to finish */
                while( p_port->endpt_rdr )
-                              ;
+             {
+                             cl_obj_unlock( &p_port->obj );
+                             cl_obj_lock( &p_port->obj );
+             }
                p_item = cl_qmap_remove( &p_port->endpt_mgr.mac_endpts, key );
                /*
                 * Dereference the endpoint.  If the ref count goes to zero, it
the sack are: 
ChildEBP RetAddr  Args to Child               
87ba4990 826b6362 0002625a 00000000 00034900 nt!KeAccumulateTicks+0x316 
87ba49d4 82617430 857c9002 000000d1 87ba4a60 nt!KeUpdateRunTime+0x145 
87ba49d4 826b3466 857c9002 000000d1 87ba4a60 hal!HalpClockInterruptPn+0x158 
87ba4a54 857b0497 87ba4a98 87ba4a6c 857b04e9 
nt!KefAcquireSpinLockAtDpcLevel+0x46 
87ba4a60 857b04e9 8aa5503c 87ba4aa4 857bda0e ipoib!cl_spinlock_acquire+0x1b 
[s:builds7198branchesmlnx_winof-2_1_3inckernelcomplibcl_spinlock_osd.h @ 96] 
87ba4a6c 857bda0e 8aa55000 a0a10888 a0a10888 ipoib!cl_obj_lock+0x11 
[s:builds7198branchesmlnx_winof-2_1_3inccomplibcl_obj.h @ 705] 
87ba4aa4 857bff40 a0a10888 87ba4ad0 87ba4adc ipoib!__recv_get_endpts+0x150 
[s:builds7198branchesmlnx_winof-2_1_3ulpipoib_ndis6_cmkernelipoib_port.cpp @ 
2579] 
87ba4ae0 857c1067 8aa55000 87ba4e90 87ba4e80 ipoib!__recv_mgr_filter+0x1dc 
[s:builds7198branchesmlnx_winof-2_1_3ulpipoib_ndis6_cmkernelipoib_port.cpp @ 
2728] 
87ba4eb0 857c1c86 8aa55000 87ba4ed0 00000001 ipoib!__recv_cb_internal+0xfb 
[s:builds7198branchesmlnx_winof-2_1_3ulpipoib_ndis6_cmkernelipoib_port.cpp @ 
2272] 
87ba4ed8 85833403 8d8a9500 8aa55000 87ba4ef4 ipoib!__recv_cb+0x11c 
[s:builds7198branchesmlnx_winof-2_1_3ulpipoib_ndis6_cmkernelipoib_port.cpp @ 
2434] 
87ba4ee8 85790368 8d8a9500 87ba4f0c 8579a034 ibbus!ci_ca_comp_cb+0x29 
[s:builds7198branchesmlnx_winof-2_1_3corealkernelal_ci_ca.c @ 365] 
87ba4ef4 8579a034 8d6bbf54 8a44a620 9cfdf240 mlx4_bus!mlx4_ib_cq_comp+0xe 
[s:builds7198branchesmlnx_winof-2_1_3hwmlx4kernelbusibcq.c @ 50] 
87ba4f0c 8579669c 8a447000 00000088 87b85a00 mlx4_bus!mlx4_cq_completion+0x62 
[s:builds7198branchesmlnx_winof-2_1_3hwmlx4kernelbusnetcq.c @ 79] 
87ba4f38 85796978 8a447000 8a44a648 87ba4fa4 mlx4_bus!mlx4_eq_int+0x80 
[s:builds7198branchesmlnx_winof-2_1_3hwmlx4kernelbusneteq.c @ 114] 
87ba4f48 826b33b5 8a44a648 8a44a620 00000000 mlx4_bus!mlx4_dpc_msix+0x10 
[s:builds7198branchesmlnx_winof-2_1_3hwmlx4kernelbusneteq.c @ 283] 
87ba4fa4 826b3218 87b84120 8d6b05c0 00000000 nt!KiExecuteAllDpcs+0xf9 
87ba4ff4 826b29dc 8b4ec9bc 00000000 00000000 nt!KiRetireDpcList+0xd5 
87ba4ff8 8b4ec9bc 00000000 00000000 00000000 nt!KiDispatchInterrupt+0x2c 
WARNING: Frame IP not in any known module. Following frames may be wrong. 
826b29dc 00000000 0000001a 00d6850f bb830000 0x8b4ec9bc 

87c44818 857b68b9 8aa55000 005e0001 0000fc00 ipoib!ipoib_port_remove_endpt+0x65 
[s:builds7198branchesmlnx_winof-2_1_3ulpipoib_ndis6_cmkernelipoib_port.cpp @ 
7372] 
87c44848 857b4920 05536bf8 8d6bae38 00000005 ipoib!ipoib_refresh_mcast+0xdd 
[s:builds7198branchesmlnx_winof-2_1_3ulpipoib_ndis6_cmkernelipoib_adapter.cpp @ 
973] 
87c4488c 857b49f3 8a536bf8 01edb378 00020000 ipoib!ipoib_set_info+0x500 
[s:builds7198branchesmlnx_winof-2_1_3ulpipoib_ndis6_cmkernelipoib_driver.cpp @ 
3130] 
87c448a8 8547e83c 8a536bf8 8fedb378 8a4270e0 ipoib!ipoib_oid_handler+0x61 
[s:builds7198branchesmlnx_winof-2_1_3ulpipoib_ndis6_cmkernelipoib_driver.cpp @ 
3239] 
87c448f0 8542a123 004270e0 c0000001 8e2b7ba0 ndis!ndisMDoOidRequest+0x3a1 
87c44918 8542a7c5 00000000 8fedb378 8fedb3f8 ndis!ndisFQueueRequestOnNext+0x21e 
87c44980 904c8086 8e2b7ba0 8fedb378 826177a0 ndis!NdisFOidRequest+0x181 
87c449a0 8547e2ff 8d6dc568 8ada8288 00000000 pacer!PcFilterRequest+0x78 
87c44a00 8542a09c 002b7ba0 c0000001 8cf7f868 ndis!ndisFDoOidRequest+0x1a2 
87c44a28 8542a7c5 00000000 8ada8288 8d8d34b0 ndis!ndisFQueueRequestOnNext+0x197 
87c44a90 904bcb22 8cf7f868 8ada8288 826177a0 ndis!NdisFOidRequest+0x181 
87c44ab0 8547e2ff 8d8d34b0 87c44bd8 00000000 wfplwf!FilterOidRequest+0x50 
87c44b10 85429a17 00f7f868 8a4270e0 87c44bd8 ndis!ndisFDoOidRequest+0x1a2 
87c44b3c 8548f602 8a4270e0 87c44bd8 00000000 ndis!ndisQueueRequestOnTop+0x224 
87c44bb0 8548f686 87fb34b8 87c44bd8 87c44c7c ndis!ndisMOidRequest+0x1dd 
87c44bc0 856946b8 87fb34b8 87c44bd8 00000000 ndis!NdisOidRequest+0xf 
87c44c7c 85628989 87f8f320 00000001 01010103 
tcpip!FlpNdisRequestUnderReference+0x7d 
87c44cb8 85628b9e 01f8f320 87c44cec 8562873b tcpip!FlpMulticastListSet+0xa1 
87c44cc4 8562873b 87f8f320 8738e020 9e964fe0 
tcpip!FlpSerializedNdisDeleteGroupWorker+0xd 
87c44cec 8286b815 8ab46c68 87f8f320 84dd54c0 
tcpip!FlpSerializedNdisRequestWorkerRoutine+0x41 

Attachment: ipoib_deadlock.patch
Description: ipoib_deadlock.patch

_______________________________________________
ofw mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ofw

Reply via email to