Hi, I think there is a concurrency issue with Spinlocks in linuxmodule multi-threaded click (running a 2.6.19.2 patched kernel, the e1000-NAPI driver and today's trunk). I've put together a temporary patch, but there might be further issues. Sorry, I don't have the time to investigate more.
Problems: The series of events are: -Thread A is running on CPU 0 and thread B is running on CPU 1. -A acquires the Spinlock and executes code in the critical section. -Linux schedules Thread A to run on CPU 1 and thread B to run on CPU 0. -B acquires the Spinlock (works because CPU 0 is the owner of the lock, not the thread) -A and B are both operating in the critical section -(if you're lucky) A releases the Spinlock and generates a releasing someone else's lock message -(if you're unlucky) Oops Attached is a test element and a configuration to elicit the behavior. I could only replicate the problem when using a polling input source under heavy load. After fixing the above problem I was still seeing two threads inside the CS. For some reason the atomic_uint32_t::swap was not working atomically as expected. Changing this to atomic_uint32::compare_and_swap solved the problem. We're running x86_64 quad core Xeon CPUs. Solution: In the patch attached I added a new function click_current_thread() which returns a thread_info pointer. Inside of Spinlock I replace click_current_processor() uses with click_current_thread(). This way even if the thread is assigned to another core it will still have the same thread_info pointer. The problem with this solution is that it does not work as an index into an array nicely and cannot simply replace all uses of click_current_processor(). There may be other places in the code which make the same incorrect use of click_current_processor(), but a better approach will be needed if the value is used to index into an array (such as in ReadWriteLock). Thanks! Kevin Springborn
--- click.orig/include/click/glue.hh
+++ click.new/include/click/glue.hh
@@ -336,6 +336,42 @@
#endif
}
+// THREAD IDENTITIES
+#if CLICK_LINUXMODULE
+typedef thread_info* click_thread_t;
+#elif CLICK_USERLEVEL && HAVE_MULTITHREAD
+typedef pthread_t click_thread_t;
+#else
+typedef int8_t click_thread_t;
+#endif
+
+inline click_thread_t
+click_current_thread()
+{
+#if CLICK_LINUXMODULE
+# if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 0)
+ return current_thread_info();
+# else
+ return current->processor;
+# endif
+#elif CLICK_USERLEVEL && HAVE_MULTITHREAD
+ return pthread_self();
+#else
+ return 0;
+#endif
+}
+
+inline click_thread_t
+click_invalid_thread()
+{
+#if CLICK_LINUXMODULE
+ return NULL;
+#elif CLICK_USERLEVEL && HAVE_MULTITHREAD
+ return 0;
+#else
+ return -1;
+#endif
+}
// TIMEVALS AND JIFFIES
// click_jiffies_t is the type of click_jiffies() and is usually unsigned.
--- click.orig/include/click/sync.hh
+++ click.new/include/click/sync.hh
@@ -64,7 +64,7 @@
private:
atomic_uint32_t _lock;
int32_t _depth;
- click_processor_t _owner;
+ click_thread_t _owner;
#endif
};
@@ -73,7 +73,7 @@
inline
Spinlock::Spinlock()
#if CLICK_MULTITHREAD_SPINLOCK
- : _depth(0), _owner(click_invalid_processor())
+ : _depth(0), _owner(click_invalid_thread())
#endif
{
#if CLICK_MULTITHREAD_SPINLOCK
@@ -101,11 +101,11 @@
Spinlock::acquire()
{
#if CLICK_MULTITHREAD_SPINLOCK
- if (_owner != click_current_processor()) {
- while (_lock.swap(1) != 0)
+ if (_owner != click_current_thread()) {
+ while (!_lock.compare_and_swap(0, 1))
while (_lock != 0)
asm volatile ("" : : : "memory");
- _owner = click_current_processor();
+ _owner = click_current_thread();
}
_depth++;
#endif
@@ -121,10 +121,10 @@
Spinlock::attempt()
{
#if CLICK_MULTITHREAD_SPINLOCK
- if (_owner != click_current_processor()) {
- if (_lock.swap(1) != 0)
+ if (_owner != click_current_thread()) {
+ if (!_lock.compare_and_swap(0, 1))
return false;
- _owner = click_current_processor();
+ _owner = click_current_thread();
}
_depth++;
return true;
@@ -142,11 +142,11 @@
Spinlock::release()
{
#if CLICK_MULTITHREAD_SPINLOCK
- if (unlikely(_owner != click_current_processor()))
+ if (unlikely(_owner != click_current_thread()))
click_chatter("releasing someone else's lock");
if (likely(_depth > 0)) {
if (--_depth == 0) {
- _owner = click_invalid_processor();
+ _owner = click_invalid_thread();
_lock = 0;
}
} else#include <click/config.h>
#include "test.hh"
#include <click/glue.hh>
CLICK_DECLS
void
TestElement::push(int, Packet * p_in)
{
click_spinlock.acquire();
for (int i = 0; i < 10000; i++)
asm volatile ("" : : : "memory");
count++;
click_spinlock.release();
checked_output_push(0, p_in);
}
CLICK_ENDDECLS
EXPORT_ELEMENT(TestElement)#ifndef TEST_ELEMENT_HH
#define TEST_ELEMENT_HH
#include <click/element.hh>
#include <click/sync.hh>
CLICK_DECLS
class TestElement : public Element {
public:
TestElement()
{
count = 0;
}
~TestElement(){click_chatter("Count %u\n", count);}
const char * class_name() const { return "TestElement";}
int configure(Vector<String> &conf, ErrorHandler *errh){return 1;}
const char *port_count() const { return "1/1"; }
void push(int x, Packet * p_in);
private:
uint count;
Spinlock click_spinlock;
};
CLICK_ENDDECLS
#endif
test.click
Description: Binary data
_______________________________________________ click mailing list [email protected] https://amsterdam.lcs.mit.edu/mailman/listinfo/click
