commit 01b72f27721c2dccf42dd5eea2e5d5e573cd8585
Author: Olaf Kirch <[EMAIL PROTECTED]>
Date:   Wed May 7 10:40:36 2008 +0200

    Fix RDS congestion monitoring
    
    The RDS congestion monitoring code tries to help applications
    deal with remote congestion more efficiently. If enabled,
    an application that tried to send to a congested port will
    receive a notification as soon as the port becomes uncongested
    again. For efficiency reasons, the application isn't given a
    complete 8K congestion bitmap, but a 64bit mask that represents
    the ports having changed, with port N being represented by
    (1 << (port % 64))
    
    This code had several bugs in it:
     -  the macro used to translate port numbers to the mask bit
        shifted integer 1, not 1ULL, resulting in undefined
        behavior when (port % 64) >= 32
    
     -  rds_ib_cong_recv computes the 64bit mask of all ports
        that changed from congested to uncongested. It got the
        bit arithmetics wrong.
        Also, it used be64_to_cpu to convert the mask, which
        is wrong, as the congestion map is little endian
    
     -  in the IB send completion handler, we need to check
        whether there is a pending congestion map update we
        need to send.
    
     -  in rds_poll, we should grab the rs_lock spinlock
        when testing whether rs_cong_mask is non-zero
    
    Signed-off-by: Olaf Kirch <[EMAIL PROTECTED]>

diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 7c20dd4..2fa2d0e 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -165,8 +165,10 @@ static unsigned int rds_poll(struct file *file, struct 
socket *sock,
                if (rds_cong_updated_since(&rs->rs_cong_track))
                        mask |= (POLLIN | POLLRDNORM | POLLWRBAND);
        } else {
+               spin_lock(&rs->rs_lock);
                if (rs->rs_cong_notify)
                        mask |= (POLLIN | POLLRDNORM);
+               spin_unlock(&rs->rs_lock);
        }
        if (!list_empty(&rs->rs_recv_queue)
         || !list_empty(&rs->rs_notify_queue))
diff --git a/net/rds/cong.c b/net/rds/cong.c
index 4ec85ce..beeb539 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -238,7 +238,7 @@ void rds_cong_map_updated(struct rds_cong_map *map, 
uint64_t portmask)
        if (waitqueue_active(&rds_poll_waitq))
                wake_up_all(&rds_poll_waitq);
 
-       if (!list_empty(&rds_cong_monitor)) {
+       if (portmask && !list_empty(&rds_cong_monitor)) {
                unsigned long flags;
                struct rds_sock *rs;
 
diff --git a/net/rds/ib_rds.h b/net/rds/ib_rds.h
index cea73fc..e098036 100644
--- a/net/rds/ib_rds.h
+++ b/net/rds/ib_rds.h
@@ -176,7 +176,7 @@ struct rds_info_tcp_socket {
  */
 #define RDS_CONG_MONITOR_SIZE  64
 #define RDS_CONG_MONITOR_BIT(port)  (((unsigned int) port) % 
RDS_CONG_MONITOR_SIZE)
-#define RDS_CONG_MONITOR_MASK(port) (1 << RDS_CONG_MONITOR_BIT(port))
+#define RDS_CONG_MONITOR_MASK(port) (1ULL << RDS_CONG_MONITOR_BIT(port))
 
 /*
  * RDMA related types
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 6c9cc9e..8ffbb0c 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -639,8 +639,9 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
                src = addr + frag_off;
                dst = (void *)map->m_page_addrs[map_page] + map_off;
                for (k = 0; k < to_copy; k += 8) {
-                       /* Record ports that became uncongested */
-                       uncongested |= *src & (*src ^ *dst);
+                       /* Record ports that became uncongested, ie
+                        * bits that changed from 0 to 1. */
+                       uncongested |= ~(*src) & *dst;
                        *dst++ = *src++;
                }
                kunmap_atomic(addr, KM_SOFTIRQ0);
@@ -662,7 +663,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
        }
 
        /* the congestion map is in little endian order */
-       uncongested = be64_to_cpu(uncongested);
+       uncongested = le64_to_cpu(uncongested);
 
        rds_cong_map_updated(map, uncongested);
 }
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 724167c..567f62f 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -218,7 +218,8 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void 
*context)
 
                rds_ib_ring_free(&ic->i_send_ring, completed);
 
-               if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags))
+               if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags)
+                || test_bit(0, &conn->c_map_queued))
                        queue_delayed_work(rds_wq, &conn->c_send_w, 0);
 
                /* We expect errors as the qp is drained during shutdown */

-- 
Olaf Kirch  |  --- o --- Nous sommes du soleil we love when we play
[EMAIL PROTECTED] |    / | \   sol.dhoop.naytheet.ah kin.ir.samse.qurax
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to