[PATCH] use wait queue spinlock for the socket spinlock

Benjamin LaHaise Tue, 07 Mar 2006 15:12:40 -0800

Hi Dave et al,

The patch below merges the use of the wait queue lock and socket spinlock 
into one.  This gains us ~100-150Mbit/s on netperf, mostly due to the fact 
that because we know how the spinlock is used, we can avoid the whole irq 
save, disable and reenable sequence since the spinlock only needs bh 
protection.  As a bonus, this also removes one atomic operation per wakeup.


[This is with x86-64 task switching tweaks as otherwise it is in the noise.]
Before:
 87380  16384  16384    10.01      9501.56   90.66    90.66    1.563   1.563 
 87380  16384  16384    10.01      9476.08   93.40    93.40    1.615   1.615 
 87380  16384  16384    10.00      9473.65   80.66    80.66    1.395   1.395 
 87380  16384  16384    10.00      9525.82   80.41    80.41    1.383   1.383 
 87380  16384  16384    10.00      9523.49   80.71    80.71    1.388   1.388 
 87380  16384  16384    10.00      9430.09   80.01    80.01    1.390   1.390 
 87380  16384  16384    10.00      9469.60   80.71    80.71    1.396   1.396 
 87380  16384  16384    10.01      9517.88   79.32    79.32    1.365   1.365 
 87380  16384  16384    10.01      9512.30   80.31    80.31    1.383   1.383 
 87380  16384  16384    10.00      9453.69   80.90    80.90    1.402   1.402 

After:
 87380  16384  16384    10.01      9629.42   92.01    92.01    1.565   1.565 
 87380  16384  16384    10.01      9641.69   90.16    90.16    1.532   1.532 
 87380  16384  16384    10.01      9650.40   90.16    90.16    1.531   1.531 
 87380  16384  16384    10.00      9638.69   90.60    90.60    1.540   1.540 
 87380  16384  16384    10.01      9667.15   89.36    89.36    1.514   1.514 
 87380  16384  16384    10.01      9684.13   89.86    89.86    1.520   1.520 
 87380  16384  16384    10.01      9642.38   90.31    90.31    1.534   1.534 
 87380  16384  16384    10.00      9669.24   90.90    90.90    1.540   1.540 
 87380  16384  16384    10.00      9676.82   90.25    90.25    1.528   1.528 
 87380  16384  16384    10.00      9711.26   90.80    90.80    1.532   1.532 

                -ben

Signed-off-by: Benjamin LaHaise <[EMAIL PROTECTED]>
diff --git a/include/net/sock.h b/include/net/sock.h
index 57e5f6b..a864d32 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -76,13 +76,12 @@
  */
 struct sock_iocb;
 typedef struct {
-       spinlock_t              slock;
        struct sock_iocb        *owner;
        wait_queue_head_t       wq;
 } socket_lock_t;
 
 #define sock_lock_init(__sk) \
-do {   spin_lock_init(&((__sk)->sk_lock.slock)); \
+do { \
        (__sk)->sk_lock.owner = NULL; \
        init_waitqueue_head(&((__sk)->sk_lock.wq)); \
 } while(0)
@@ -733,8 +732,8 @@ extern void FASTCALL(lock_sock(struct so
 extern void FASTCALL(release_sock(struct sock *sk));
 
 /* BH context may only use the following locking interface. */
-#define bh_lock_sock(__sk)     spin_lock(&((__sk)->sk_lock.slock))
-#define bh_unlock_sock(__sk)   spin_unlock(&((__sk)->sk_lock.slock))
+#define bh_lock_sock(__sk)     spin_lock(&((__sk)->sk_lock.wq.lock))
+#define bh_unlock_sock(__sk)   spin_unlock(&((__sk)->sk_lock.wq.lock))
 
 extern struct sock             *sk_alloc(int family,
                                          gfp_t priority,
diff --git a/net/core/filter.c b/net/core/filter.c
index 93fbd01..69e4636 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -421,10 +421,10 @@ int sk_attach_filter(struct sock_fprog *
        if (!err) {
                struct sk_filter *old_fp;
 
-               spin_lock_bh(&sk->sk_lock.slock);
+               spin_lock_bh(&sk->sk_lock.wq.lock);
                old_fp = sk->sk_filter;
                sk->sk_filter = fp;
-               spin_unlock_bh(&sk->sk_lock.slock);
+               spin_unlock_bh(&sk->sk_lock.wq.lock);
                fp = old_fp;
        }
 
diff --git a/net/core/sock.c b/net/core/sock.c
index f152783..96008af 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -445,15 +445,15 @@ set_rcvbuf:
                        break;
 
                case SO_DETACH_FILTER:
-                       spin_lock_bh(&sk->sk_lock.slock);
+                       spin_lock_bh(&sk->sk_lock.wq.lock);
                        filter = sk->sk_filter;
                         if (filter) {
                                sk->sk_filter = NULL;
-                               spin_unlock_bh(&sk->sk_lock.slock);
+                               spin_unlock_bh(&sk->sk_lock.wq.lock);
                                sk_filter_release(sk, filter);
                                break;
                        }
-                       spin_unlock_bh(&sk->sk_lock.slock);
+                       spin_unlock_bh(&sk->sk_lock.wq.lock);
                        ret = -ENONET;
                        break;
 
@@ -1031,20 +1031,25 @@ struct sk_buff *sock_alloc_send_skb(stru
        return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
 }
 
-static void __lock_sock(struct sock *sk)
+/* We use noinline here as this is the slow path and allowing gcc to inline
+ * results in much poorer code for lock_sock.
+ */
+static void noinline __lock_sock(struct sock *sk)
 {
        DEFINE_WAIT(wait);
 
        for(;;) {
-               prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
-                                       TASK_UNINTERRUPTIBLE);
-               spin_unlock_bh(&sk->sk_lock.slock);
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               add_wait_queue_exclusive_locked(&sk->sk_lock.wq, &wait);
+               spin_unlock_bh(&sk->sk_lock.wq.lock);
                schedule();
-               spin_lock_bh(&sk->sk_lock.slock);
+               spin_lock_bh(&sk->sk_lock.wq.lock);
                if(!sock_owned_by_user(sk))
                        break;
        }
-       finish_wait(&sk->sk_lock.wq, &wait);
+       __set_current_state(TASK_RUNNING);
+       if (!list_empty_careful(&wait.task_list))
+               list_del_init(&wait.task_list);
 }
 
 static void __release_sock(struct sock *sk)
@@ -1331,24 +1336,28 @@ void sock_init_data(struct socket *sock,
 void fastcall lock_sock(struct sock *sk)
 {
        might_sleep();
-       spin_lock_bh(&(sk->sk_lock.slock));
+       spin_lock_bh(&(sk->sk_lock.wq.lock));
        if (sk->sk_lock.owner)
                __lock_sock(sk);
        sk->sk_lock.owner = (void *)1;
-       spin_unlock_bh(&(sk->sk_lock.slock));
+       spin_unlock_bh(&(sk->sk_lock.wq.lock));
 }
 
 EXPORT_SYMBOL(lock_sock);
 
 void fastcall release_sock(struct sock *sk)
 {
-       spin_lock_bh(&(sk->sk_lock.slock));
+       if (!sk->sk_backlog.tail && !waitqueue_active(&sk->sk_lock.wq)) {
+               sk->sk_lock.owner = NULL;
+               return;
+       }
+       spin_lock_bh(&(sk->sk_lock.wq.lock));
        if (sk->sk_backlog.tail)
                __release_sock(sk);
        sk->sk_lock.owner = NULL;
         if (waitqueue_active(&(sk->sk_lock.wq)))
-               wake_up(&(sk->sk_lock.wq));
-       spin_unlock_bh(&(sk->sk_lock.slock));
+               wake_up_locked(&(sk->sk_lock.wq));
+       spin_unlock_bh(&(sk->sk_lock.wq.lock));
 }
 EXPORT_SYMBOL(release_sock);
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e7bbff4..d28c277 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -237,7 +237,7 @@ static __inline__ int icmp_xmit_lock(voi
 {
        local_bh_disable();
 
-       if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.slock))) {
+       if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.wq.lock))) {
                /* This can happen if the output path signals a
                 * dst_link_failure() for an outgoing ICMP packet.
                 */
@@ -249,7 +249,7 @@ static __inline__ int icmp_xmit_lock(voi
 
 static void icmp_xmit_unlock(void)
 {
-       spin_unlock_bh(&icmp_socket->sk->sk_lock.slock);
+       spin_unlock_bh(&icmp_socket->sk->sk_lock.wq.lock);
 }
 
 /*
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 21eb725..fabd268 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -91,7 +91,7 @@ static __inline__ int icmpv6_xmit_lock(v
 {
        local_bh_disable();
 
-       if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.slock))) {
+       if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.wq.lock))) {
                /* This can happen if the output path (f.e. SIT or
                 * ip6ip6 tunnel) signals dst_link_failure() for an
                 * outgoing ICMP6 packet.
@@ -104,7 +104,7 @@ static __inline__ int icmpv6_xmit_lock(v
 
 static __inline__ void icmpv6_xmit_unlock(void)
 {
-       spin_unlock_bh(&icmpv6_socket->sk->sk_lock.slock);
+       spin_unlock_bh(&icmpv6_socket->sk->sk_lock.wq.lock);
 }
 
 /* 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 0ea947e..95beba0 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5602,12 +5602,12 @@ static void sctp_sock_migrate(struct soc
         */
        newsp->type = type;
 
-       spin_lock_bh(&oldsk->sk_lock.slock);
+       spin_lock_bh(&oldsk->sk_lock.wq.lock);
        /* Migrate the backlog from oldsk to newsk. */
        sctp_backlog_migrate(assoc, oldsk, newsk);
        /* Migrate the association to the new socket. */
        sctp_assoc_migrate(assoc, newsk);
-       spin_unlock_bh(&oldsk->sk_lock.slock);
+       spin_unlock_bh(&oldsk->sk_lock.wq.lock);
 
        /* If the association on the newsk is already closed before accept()
         * is called, set RCV_SHUTDOWN flag.

----- End forwarded message -----
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] use wait queue spinlock for the socket spinlock

Reply via email to