From 24000a7c11fedb519aab11807703d91ae49ac421 Mon Sep 17 00:00:00 2001
From: Olaf Kirch <[EMAIL PROTECTED]>
Date: Thu, 24 Apr 2008 00:27:36 -0700
Subject: [PATCH] RDS: Implement rds ping

Several people have asked for a way to test reachability of
remote nodes via RDS. This is it - rds ping.

RDS ping is implemented by sending packets to port 0.
As a matter of simplicity, we do not handle packet payloads at this time -
the ping response is always an empty packet.

Signed-off-by: Olaf Kirch <[EMAIL PROTECTED]>
---
 net/rds/cong.c   |    2 +-
 net/rds/rds.h    |    5 ++++
 net/rds/recv.c   |    6 +++++
 net/rds/send.c   |   56 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 net/rds/stats.c  |    2 +
 net/rds/sysctl.c |   10 +++++++++
 6 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/net/rds/cong.c b/net/rds/cong.c
index 2db2362..4ec85ce 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -348,7 +348,7 @@ int rds_cong_wait(struct rds_cong_map *map, __be16 port, 
int nonblock, struct rd
        if (!rds_cong_test_bit(map, port))
               return 0;
        if (nonblock) {
-               if (rs->rs_cong_monitor) {
+               if (rs && rs->rs_cong_monitor) {
                        unsigned long flags;
 
                        /* It would have been nice to have an atomic set_bit on
diff --git a/net/rds/rds.h b/net/rds/rds.h
index d5a966d..a0fb20c 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -487,6 +487,7 @@ struct rds_statistics {
        unsigned long   s_recv_delayed_retry;
        unsigned long   s_recv_ack_required;
        unsigned long   s_recv_rdma_bytes;
+       unsigned long   s_recv_ping;
        unsigned long   s_send_queue_empty;
        unsigned long   s_send_queue_full;
        unsigned long   s_send_sem_contention;
@@ -497,6 +498,7 @@ struct rds_statistics {
        unsigned long   s_send_ack_required;
        unsigned long   s_send_rdma;
        unsigned long   s_send_rdma_bytes;
+       unsigned long   s_send_pong;
        unsigned long   s_page_remainder_hit;
        unsigned long   s_page_remainder_miss;
        unsigned long   s_cong_update_queued;
@@ -570,6 +572,7 @@ rds_conn_up(struct rds_connection *conn)
 }
 
 /* message.c */
+struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
 struct rds_message *rds_message_copy_from_user(struct iovec *first_iov,
                                               size_t total_len);
 void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
@@ -641,6 +644,7 @@ void rds_send_drop_acked(struct rds_connection *conn, u64 
ack,
                         is_acked_func is_acked);
 int rds_send_acked_before(struct rds_connection *conn, u64 seq);
 void rds_send_remove_from_sock(struct list_head *messages, int status);
+int rds_send_pong(struct rds_connection *conn, __be16 dport);
 
 /* rdma.c */
 void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force);
@@ -672,6 +676,7 @@ extern unsigned long rds_sysctl_reconnect_min_jiffies;
 extern unsigned long rds_sysctl_reconnect_max_jiffies;
 extern unsigned int  rds_sysctl_max_unacked_packets;
 extern unsigned int  rds_sysctl_max_unacked_bytes;
+extern unsigned int  rds_sysctl_ping_enable;
 
 /* threads.c */
 int __init rds_threads_init(void);
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 9adb24d..da3c879 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -196,6 +196,12 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 
saddr, __be32 daddr,
        }
        conn->c_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1;
 
+       if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) {
+               rds_stats_inc(s_recv_ping);
+               rds_send_pong(conn, inc->i_hdr.h_sport);
+               goto out;
+       }
+
        rs = rds_find_bound(daddr, inc->i_hdr.h_dport);
        if (rs == NULL) {
                rds_stats_inc(s_recv_drop_no_sock);
diff --git a/net/rds/send.c b/net/rds/send.c
index a2a5b2a..26e1e3e 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -700,8 +700,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, 
struct msghdr *msg,
 
        if (msg->msg_namelen) {
                /* XXX fail non-unicast destination IPs? */
-               if (msg->msg_namelen < sizeof(*usin) || usin->sin_family != 
AF_INET ||
-                   usin->sin_port == 0) {
+               if (msg->msg_namelen < sizeof(*usin) || usin->sin_family != 
AF_INET) {
                        ret = -EINVAL;
                        goto out;
                }
@@ -820,3 +819,56 @@ out:
                rds_message_put(rm);
        return ret;
 }
+
+/*
+ * Reply to a ping packet.
+ */
+int
+rds_send_pong(struct rds_connection *conn, __be16 dport)
+{
+       struct rds_message *rm;
+       unsigned long flags;
+       int ret = 0;
+
+       rm = rds_message_alloc(0, GFP_ATOMIC);
+       if (rm == NULL) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       rm->m_daddr = conn->c_faddr;
+
+       /* If the connection is down, trigger a connect. We may
+        * have scheduled a delayed reconnect however - in this case
+        * we should not interfere.
+        */
+       if (rds_conn_state(conn) == RDS_CONN_DOWN
+        && !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags))
+               queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
+
+       ret = rds_cong_wait(conn->c_fcong, dport, 1, NULL);
+       if (ret)
+               goto out;
+
+       spin_lock_irqsave(&conn->c_lock, flags);
+       list_add_tail(&rm->m_conn_item, &conn->c_send_queue);
+       set_bit(RDS_MSG_ON_CONN, &rm->m_flags);
+       rds_message_addref(rm);
+       rm->m_inc.i_conn = conn;
+
+       rds_message_populate_header(&rm->m_inc.i_hdr, 0, dport,
+                                   conn->c_next_tx_seq);
+       conn->c_next_tx_seq++;
+       spin_unlock_irqrestore(&conn->c_lock, flags);
+
+       rds_stats_inc(s_send_pong);
+
+       queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+       rds_message_put(rm);
+       return 0;
+
+out:
+       if (rm)
+               rds_message_put(rm);
+       return ret;
+}
diff --git a/net/rds/stats.c b/net/rds/stats.c
index abf7103..0bd91fa 100644
--- a/net/rds/stats.c
+++ b/net/rds/stats.c
@@ -53,6 +53,7 @@ static char *rds_stat_names[] = {
        "recv_delayed_retry",
        "recv_ack_required",
        "recv_rdma_bytes",
+       "recv_ping",
        "send_queue_empty",
        "send_queue_full",
        "send_sem_contention",
@@ -63,6 +64,7 @@ static char *rds_stat_names[] = {
        "send_ack_required",
        "send_rdma",
        "send_rdma_bytes",
+       "send_pong",
        "page_remainder_hit",
        "page_remainder_miss",
        "cong_update_queued",
diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
index 5f7ce37..7b18c0a 100644
--- a/net/rds/sysctl.c
+++ b/net/rds/sysctl.c
@@ -47,6 +47,8 @@ unsigned long rds_sysctl_reconnect_max_jiffies = HZ;
 unsigned int  rds_sysctl_max_unacked_packets = 16;
 unsigned int  rds_sysctl_max_unacked_bytes = (16 << 20);
 
+unsigned int rds_sysctl_ping_enable = 1;
+
 /* 
  * These can change over time until they're official.  Until that time we'll
  * give apps a way to figure out what the values are in a given machine.
@@ -107,6 +109,14 @@ static ctl_table rds_sysctl_rds_table[] = {
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
        },
+       {
+               .ctl_name       = 10,
+               .procname       = "ping_enable",
+               .data           = &rds_sysctl_ping_enable,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
        /* 100+ are reserved for transport subdirs */
        { .ctl_name = 0}
 };
-- 
1.5.4.rc3


-- 
Olaf Kirch  |  --- o --- Nous sommes du soleil we love when we play
[EMAIL PROTECTED] |    / | \   sol.dhoop.naytheet.ah kin.ir.samse.qurax
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to