On Thu, May 31, 2012 at 3:35 AM, Alex Elder <[email protected]> wrote:
> Start explicitly keeping track of the state of a ceph connection's
> socket, separate from the state of the connection itself. Create
> placeholder functions to encapsulate the state transitions.
>
> --------
> | NEW* | transient initial state
> --------
> | con_sock_state_init()
> v
> ----------
> | CLOSED | initialized, but no socket (and no
> ---------- TCP connection)
> ^ \
> | \ con_sock_state_connecting()
> | ----------------------
> | \
> + con_sock_state_closed() \
> |\ \
> | \ \
> | ----------- \
> | | CLOSING | socket event; \
> | ----------- await close \
> | ^ |
> | | |
> | + con_sock_state_closing() |
> | / \ |
> | / --------------- |
> | / \ v
> | / --------------
> | / -----------------| CONNECTING | socket created, TCP
> | | / -------------- connect initiated
> | | | con_sock_state_connected()
> | | v
> -------------
> | CONNECTED | TCP connection established
> -------------
>
> Make the socket state an atomic variable, reinforcing that it's a
> distinct transtion with no possible "intermediate/both" states.
> This is almost certainly overkill at this point, though the
> transitions into CONNECTED and CLOSING state do get called via
> socket callback (the rest of the transitions occur with the
> connection mutex held). We can back out the atomicity later.
>
> Signed-off-by: Alex Elder <[email protected]>
> ---
> include/linux/ceph/messenger.h | 8 ++++-
> net/ceph/messenger.c | 63
> ++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 69 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
> index 920235e..5e852f4 100644
> --- a/include/linux/ceph/messenger.h
> +++ b/include/linux/ceph/messenger.h
> @@ -137,14 +137,18 @@ struct ceph_connection {
> const struct ceph_connection_operations *ops;
>
> struct ceph_messenger *msgr;
> +
> + atomic_t sock_state;
> struct socket *sock;
> + struct ceph_entity_addr peer_addr; /* peer address */
> + struct ceph_entity_addr peer_addr_for_me;
> +
> unsigned long flags;
> unsigned long state;
> const char *error_msg; /* error message, if any */
>
> - struct ceph_entity_addr peer_addr; /* peer address */
> struct ceph_entity_name peer_name; /* peer name */
> - struct ceph_entity_addr peer_addr_for_me;
> +
> unsigned peer_features;
> u32 connect_seq; /* identify the most recent connection
> attempt for this connection, client */
> diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
> index 29055df..7e11b07 100644
> --- a/net/ceph/messenger.c
> +++ b/net/ceph/messenger.c
> @@ -29,6 +29,14 @@
> * the sender.
> */
>
> +/* State values for ceph_connection->sock_state; NEW is assumed to be 0 */
> +
> +#define CON_SOCK_STATE_NEW 0 /* -> CLOSED */
> +#define CON_SOCK_STATE_CLOSED 1 /* -> CONNECTING */
> +#define CON_SOCK_STATE_CONNECTING 2 /* -> CONNECTED or ->
> CLOSING */
> +#define CON_SOCK_STATE_CONNECTED 3 /* -> CLOSING or -> CLOSED
> */
> +#define CON_SOCK_STATE_CLOSING 4 /* -> CLOSED */
> +
> /* static tag bytes (protocol control messages) */
> static char tag_msg = CEPH_MSGR_TAG_MSG;
> static char tag_ack = CEPH_MSGR_TAG_ACK;
> @@ -147,6 +155,54 @@ void ceph_msgr_flush(void)
> }
> EXPORT_SYMBOL(ceph_msgr_flush);
>
> +/* Connection socket state transition functions */
> +
> +static void con_sock_state_init(struct ceph_connection *con)
> +{
> + int old_state;
> +
> + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
> + if (WARN_ON(old_state != CON_SOCK_STATE_NEW))
> + printk("%s: unexpected old state %d\n", __func__,
> old_state);
> +}
> +
> +static void con_sock_state_connecting(struct ceph_connection *con)
> +{
> + int old_state;
> +
> + old_state = atomic_xchg(&con->sock_state,
> CON_SOCK_STATE_CONNECTING);
> + if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED))
> + printk("%s: unexpected old state %d\n", __func__,
> old_state);
> +}
> +
> +static void con_sock_state_connected(struct ceph_connection *con)
> +{
> + int old_state;
> +
> + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED);
> + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING))
> + printk("%s: unexpected old state %d\n", __func__,
> old_state);
> +}
> +
> +static void con_sock_state_closing(struct ceph_connection *con)
> +{
> + int old_state;
> +
> + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSING);
> + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING &&
> + old_state != CON_SOCK_STATE_CONNECTED))
> + printk("%s: unexpected old state %d\n", __func__,
> old_state);
> +}
> +
> +static void con_sock_state_closed(struct ceph_connection *con)
> +{
> + int old_state;
> +
> + old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
> + if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED &&
> + old_state != CON_SOCK_STATE_CLOSING))
> + printk("%s: unexpected old state %d\n", __func__,
> old_state);
> +}
>
Hi Alex,
Single node setup can easily trigger above WARN_ON. I think this is because
local TCP connection setup and shutdown is very fast, they finish immediately
after sock->connect and sock->shutdown return.
Yan, Zheng
> /*
> * socket callback functions
> @@ -203,6 +259,7 @@ static void ceph_sock_state_change(struct sock *sk)
> dout("%s TCP_CLOSE\n", __func__);
> case TCP_CLOSE_WAIT:
> dout("%s TCP_CLOSE_WAIT\n", __func__);
> + con_sock_state_closing(con);
> if (test_and_set_bit(SOCK_CLOSED, &con->flags) == 0) {
> if (test_bit(CONNECTING, &con->state))
> con->error_msg = "connection failed";
> @@ -213,6 +270,7 @@ static void ceph_sock_state_change(struct sock *sk)
> break;
> case TCP_ESTABLISHED:
> dout("%s TCP_ESTABLISHED\n", __func__);
> + con_sock_state_connected(con);
> queue_con(con);
> break;
> default: /* Everything else is uninteresting */
> @@ -277,6 +335,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)
> return ret;
> }
> con->sock = sock;
> + con_sock_state_connecting(con);
>
> return 0;
> }
> @@ -341,6 +400,7 @@ static int con_close_socket(struct ceph_connection *con)
> rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
> sock_release(con->sock);
> con->sock = NULL;
> + con_sock_state_closed(con);
> return rc;
> }
>
> @@ -460,6 +520,9 @@ void ceph_con_init(struct ceph_messenger *msgr, struct
> ceph_connection *con)
> memset(con, 0, sizeof(*con));
> atomic_set(&con->nref, 1);
> con->msgr = msgr;
> +
> + con_sock_state_init(con);
> +
> mutex_init(&con->mutex);
> INIT_LIST_HEAD(&con->out_queue);
> INIT_LIST_HEAD(&con->out_sent);
> --
> 1.7.5.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html