Jim,
I just ran with 1.2 and hit the same bug. I've included the debug msgs leading up to the oops (at the bottom). I think the problem has to do with handling a connection request after a socket has been destroyed. The failed instance of sdp_connect_handler() doesn't appear to run sdp_init_qp() so I assume that it fails somewhere before that.

I wonder if event->param.conn.private_data is bogus?

Thanks for your help.
Paul


int sdp_connect_handler(struct sock *sk, struct rdma_cm_id *id,
                       struct rdma_cm_event *event)
{
       struct sockaddr_in *dst_addr;
       struct sock *child;
       const struct sdp_hh *h;
       int rc;

       sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id);

       h = event->param.conn.private_data;

       if (!h->max_adverts)
               return -EINVAL;

       child = sk_clone(sk, GFP_KERNEL);
       if (!child)
               return -ENOMEM;

       sdp_add_sock(sdp_sk(child));
       INIT_LIST_HEAD(&sdp_sk(child)->accept_queue);
       INIT_LIST_HEAD(&sdp_sk(child)->backlog_queue);
INIT_DELAYED_WORK(&sdp_sk(child)->time_wait_work, sdp_time_wait_work);
       INIT_WORK(&sdp_sk(child)->destroy_work, sdp_destroy_work);

       dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr;
       inet_sk(child)->dport = dst_addr->sin_port;
       inet_sk(child)->daddr = dst_addr->sin_addr.s_addr;

       bh_unlock_sock(child);
       __sock_put(child);

       rc = sdp_init_qp(child, id);
...

################# Console Msgs ###########################################

oss08p login:
Fedora Core release 3 (Heidelberg)
Kernel 2.6.9-42.0.8.EL_lustre.1.4.9.1customOBost_0.4 on an x86_64

oss08p login: Jul 30 16:08:22 oss08p kernel: sdp_sock(988:0): sdp_cma_handler event 4 id 000001015ed2b600 Jul 30 16:08:22 oss08p kernel: sdp_sock(988:0): RDMA_CM_EVENT_CONNECT_REQUEST Jul 30 16:08:22 oss08p kernel: sdp_sock(988:0): sdp_connect_handler 000001015ed30c00 -> 000001015ed2b600
Jul 30 16:08:22 oss08p kernel: sdp_sock(988:1023): sdp_init_qp
Jul 30 16:08:22 oss08p kernel: sdp_sock(988:1023): sdp_init_qp done
Jul 30 16:08:22 oss08p kernel: sdp_sock(988:1023): sdp_connect_handler bufs 64 xmit_size_goal 32768 Jul 30 16:08:22 oss08p kernel: sdp_sock(988:0): sdp_accept state 10 expected 10 *err -22 Jul 30 16:08:22 oss08p kernel: sdp_sock(988:0): sdp_cma_handler event 4 handled
Jul 30 16:08:22 oss08p kernel: sdp_sock(988:0): event 4 done. status 0
Jul 30 16:08:22 oss08p kernel: sdp_sock(988:0): sdp_accept: error -11
Jul 30 16:08:22 oss08p kernel: sdp_sock(988:0): sdp_accept: status -11 sk 000001014e790780 newsk 0000000000000000 Jul 30 16:08:22 oss08p kernel: sdp_sock(988:0): sdp_accept state 10 expected 10 *err -22
Jul 30 16:08:22 oss08p kernel: sdp_sock(988:0): sdp_accept: error -11
Jul 30 16:08:22 oss08p kernel: sdp_sock(988:0): sdp_accept: status -11 sk 000001014e790780 newsk 0000000000000000 Jul 30 16:08:22 oss08p kernel: sdp_sock(988:1023): sdp_cma_handler event 9 id 000001015ed2b600
Jul 30 16:08:22 oss08p kernel: sdp_sock(988:1023): RDMA_CM_EVENT_ESTABLISHED
Jul 30 16:08:22 oss08p kernel: sdp_sock(988:1023): sdp_connected_handler
Jul 30 16:08:22 oss08p kernel: sdp_sock(988:0): sdp_connected_handler child connection established Jul 30 16:08:22 oss08p kernel: sdp_sock(988:1023): sdp_cma_handler event 9 handled
Jul 30 16:08:22 oss08p kernel: sdp_sock(988:1023): event 9 done. status 0
Jul 30 16:08:22 oss08p kernel: sdp_sock(988:0): sdp_accept state 10 expected 10 *err -22 Jul 30 16:08:22 oss08p kernel: sdp_sock(988:1023): sdp_accept: ib_req_notify_cq Jul 30 16:08:22 oss08p kernel: sdp_sock(988:0): sdp_accept: status -22 sk 000001014e790780 newsk 000001014e790040 Jul 30 16:08:22 oss08p kernel: sdp_sock(988:0): sdp_accept state 10 expected 10 *err -22
Jul 30 16:08:22 oss08p kernel: sdp_sock(988:0): sdp_accept: error -11
Jul 30 16:08:22 oss08p kernel: sdp_sock(988:0): sdp_accept: status -11 sk 000001014e790780 newsk 0000000000000000
Jul 30 16:08:22 oss08p kernel: sdp_sock(988:1023): sdp_setsockopt

Fedora Core release 3 (Heidelberg)
Kernel 2.6.9-42.0.8.EL_lustre.1.4.9.1customOBost_0.4 on an x86_64

oss08p login:
Fedora Core release 3 (Heidelberg)
Kernel 2.6.9-42.0.8.EL_lustre.1.4.9.1customOBost_0.4 on an x86_64

Fedora Core release 3 (Heidelberg)
Kernel 2.6.9-42.0.8.EL_lustre.1.4.9.1customOBost_0.4 on an x86_64

oss08p login:
Fedora Core release 3 (Heidelberg)
Kernel 2.6.9-42.0.8.EL_lustre.1.4.9.1customOBost_0.4 on an x86_64

oss08p login: Jul 30 16:09:40 oss08p kernel: sdp_sock(988:0): sdp_cma_handler event 4 id 000001015ee0c800 Jul 30 16:09:40 oss08p kernel: sdp_sock(988:0): RDMA_CM_EVENT_CONNECT_REQUEST Jul 30 16:09:40 oss08p kernel: sdp_sock(988:0): sdp_connect_handler 000001015ed30c00 -> 000001015ee0c800
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_init_qp
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_init_qp done
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_connect_handler bufs 64 xmit_size_goal 32768 Jul 30 16:09:40 oss08p kernel: sdp_sock(988:0): sdp_accept state 10 expected 10 *err -22 Jul 30 16:09:40 oss08p kernel: sdp_sock(988:0): sdp_cma_handler event 4 handled
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:0): event 4 done. status 0
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:0): sdp_accept: error -11
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:0): sdp_accept: status -11 sk 000001014e790780 newsk 0000000000000000 Jul 30 16:09:40 oss08p kernel: sdp_sock(988:0): sdp_accept state 10 expected 10 *err -22
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:0): sdp_accept: error -11
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:0): sdp_accept: status -11 sk 000001014e790780 newsk 0000000000000000 Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_cma_handler event 9 id 000001015ee0c800 Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): RDMA_CM_EVENT_ESTABLISHED
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_connected_handler
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:0): sdp_connected_handler child connection established Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_cma_handler event 9 handled
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): event 9 done. status 0
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:0): sdp_accept state 10 expected 10 *err -22 Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_accept: ib_req_notify_cq Jul 30 16:09:40 oss08p kernel: sdp_sock(988:0): sdp_accept: status -22 sk 000001014e790780 newsk 0000010151c457c0 Jul 30 16:09:40 oss08p kernel: sdp_sock(988:0): sdp_accept state 10 expected 10 *err -22
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:0): sdp_accept: error -11
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:0): sdp_accept: status -11 sk 000001014e790780 newsk 0000000000000000
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_setsockopt
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_fin
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_close
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_close: entering time wait refcnt 2 Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_close: last socket put 2
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_unhash
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_handle_wc: destroy in time wait state Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_cma_handler event 10 id 000001015ee0c800 Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): RDMA_CM_EVENT_DISCONNECTED Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_destroy_work: refcnt 1
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_destruct
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_close_sk
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_disconnected_handler
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_cma_handler event 10 handled
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_reset_sk
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): event 10 done. status -104 Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_close_sk done; releasing sock
Jul 30 16:09:40 oss08p kernel: sdp_sock(988:32768): sdp_destruct done

Fedora Core release 3 (Heidelberg)
Kernel 2.6.9-42.0.8.EL_lustre.1.4.9.1customOBost_0.4 on an x86_64

oss08p login:
Fedora Core release 3 (Heidelberg)
Kernel 2.6.9-42.0.8.EL_lustre.1.4.9.1customOBost_0.4 on an x86_64

oss08p login:
Kernel BUG at sdp_cma:372
invalid operand: 0000 [1] SMP
CPU 1
Modules linked in: ksocklnd ptlrpc obdclass lvfs lnet libcfs ib_ipoib ib_srp ib_sdp rdma_cm ib_addr iw_cm ib_local_sa ib_cm iptable_filter i
p_tables e1000 ib_sa ib_uverbs ib_umad ib_mthca ib_mad ib_core md forcedeth
Pid: 2362, comm: ib_cm/1 Not tainted 2.6.9-42.0.8.EL_lustre.1.4.9.1customOBost_0.4 RIP: 0010:[<ffffffffa00abbbf>] <ffffffffa00abbbf>{:ib_sdp:sdp_connect_handler+207}
RSP: 0018:0000010155f21bc8  EFLAGS: 00010246
RAX: 0000000000000000 RBX: 0000010150e45800 RCX: 0000000000000000
RDX: 00000000ffffffff RSI: 0000010150e450a8 RDI: ffffffffa00b3640
RBP: ffffffffa00b3140 R08: 0000010155ef8ef8 R09: 0000010155ef8f08
R10: 00000000ffffffff R11: 0000000000000000 R12: 000001014e790780
R13: 0000010155f21cf8 R14: 0000000000000000 R15: 000001015681bfa4
FS:  0000002a9589db00(0000) GS:ffffffff805a3140(0000) knlGS:0000000000000000
CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 0000002a986adad8 CR3: 000000007ea38000 CR4: 00000000000006e0
Process ib_cm/1 (pid: 2362, threadinfo 0000010155f20000, task 00000101565897e0)
Stack: 000001015ede2400 000001014e7909e0 000001014e790780 000001015ede2400
      0000010155f21cf8 0000000000000000 0000000000000000 ffffffffa00ac581
      000001015ede2400 000001015ede2458
Call Trace:<ffffffffa00ac581>{:ib_sdp:sdp_cma_handler+945} <ffffffffa00a0347>{:rdma_cm:cma_acquire_dev+359} <ffffffffa00a1488>{:rdma_cm:cma_req_handler+1000} <ffffffffa006859a>{:ib_cm:cm_process_work+26} <ffffffffa006901f>{:ib_cm:cm_req_handler+2463} <ffffffffa0069490>{:ib_cm:cm_work_handler+0} <ffffffffa00694d2>{:ib_cm:cm_work_handler+66} <ffffffff80133133>{__wake_up+67} <ffffffffa0069490>{:ib_cm:cm_work_handler+0} <ffffffff80149110>{worker_thread+496} <ffffffff80133070>{default_wake_function+0} <ffffffff801330c0>{__wake_up_common+64} <ffffffff80133070>{default_wake_function+0} <ffffffff8014d630>{keventd_create_kthread+0} <ffffffff80148f20>{worker_thread+0} <ffffffff8014d630>{keventd_create_kthread+0}
      <ffffffff8014d5e9>{kthread+217} <ffffffff8011144b>{child_rip+8}
<ffffffff8014d630>{keventd_create_kthread+0} <ffffffff8014d510>{kthread+0}
      <ffffffff80111443>{child_rip+0}

Code: 0f 0b 01 f0 0a a0 ff ff ff ff 74 01 66 66 90 66 90 65 8b 04
RIP <ffffffffa00abbbf>{:ib_sdp:sdp_connect_handler+207} RSP <0000010155f21bc8> <0>Kernel panic Jul 30 16:10:11 -oss08p kernel: sdp_sock(988:0): sdp_cma_handler event 4 id 000001015ede2400 Jul 30 16:10:11 oss08p kernel: sdp_sock(988:0): RDMA_CM_EVENT_CONNECT_REQUEST Jul 30 16:10:11 oss08p kernel: sdp_sock(988:0): sdp_connect_handler 000001015ed30c00 -> 000001015ede 2400 Jul 30 16:10:11 oss08p kernel: ----------- [cut here ] --------- [please bite here ] ---------
Jul 30 16:10:12 oss08p kernel: Kernel BUG at sdp_cma:372
Jul 30 16:10:12 oss08p kernel: invalid operand: 0000 [1] SMP
Jul 30 16:10:12 oss08p kernel: CPU 1
Jul 30 16:n10:12 oss08p kernel: Modules linked in: ksocklnd ptlrpc obdclass lvfs lnet libcfs ib_ipoib ib_srp ib_sdp rdma_cm ib_addr iw_cm ib _local_sa ib_cm iptable_filter ip_tables e1000 ib_sa ib_uverbs ib_umad ib_mthca ib_mad ib_core md forcedeth Jul 30 16:10:12 osos08p kernel: Pid: 2362, comm: ib_cm/1 Not tainted 2.6.9-42.0.8.EL_lustre.1.4.9.1customOBost_0.4 Jul 30 16:10:12 oss08p kernel: RIP: 0010:[<ffffffffa00abbbf>] <ffffffffa00abbbf>{:ib_sdp:sdp_connect_handler+207}
Jul 30 16:10:12 oss08p kernel: RSP: 0018:0000010155f21bc8  EtFLAGS: 00010246
Jul 30 16:10:12 oss08p kernel: RAX: 0000000000000000 RBX: 0000010150e45800 RCX: 0000000000000000 Jul 30 16:10:12 oss08p kernel: RDX: 00000000ffffffff RSI: 0000010150e450a8 RDI: ffffffffa00b3640 Jul 30 16:10:12 oss08p kernel: RBP: fffffff fa00b3140 R08: 0000010155ef8ef8 R09: 0000010155ef8f08 Jul 30 16:10:12 oss08p kernel: R10: 00000000ffffffff R11: 0000000000000000 R12: 000001014e790780 Jul 30 16:10:12 oss08p kernel: R13: 0000010155f21cf8 R14: 0000000000000000 R15: 000001015681bfa4 Jul 30 16:10:12 oss08sp kernel: FS: 0000002a9589db00(0000) GS:ffffffff805a3140(0000) knlGS:0000000000000000 Jul 30 16:10:12 oss08p kernel: CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b Jul 30 16:10:12 oss08p kernel: CR2: 0000002a986adad8 CR3: 000000007ea38000 CR4: 00000000000006e0 Jul y30 16:10:12 oss08p kernel: Process ib_cm/1 (pid: 2362, threadinfo 0000010155f20000, task 00000101565897e0) Jul 30 16:10:12 oss08p kernel: Stack: 000001015ede2400 000001014e7909e0 000001014e790780 000001015ede2400 Jul 30 16:10:12 oss08p kernel: 00n00010155f21cf8 0000000000000000 0000000000000000 ffffffffa00ac581
Jul 30 16:10:12 oss08p kernel:        000001015ede2400 000001015ede2458
Jul 30 16:10:12 oss08p kernel: Call Trace:<ffffffffa00ac581>{:ib_sdp:sdp_cma_handler+945} <ffffffffa00a0347>{:rdma_cm:cma_acquire_cdev+359} Jul 30 16:10:12 oss08p kernel: <ffffffffa00a1488>{:rdma_cm:cma_req_handler+1000} <ffffffffa006859a>{:ib_cm:cm_process_work+26} Jul 30 16:10:12 oss08p kernel: <ffffffffa006901f>{:ib_cm:cm_req_handler+2463} <ffffffffa0069490>{:ib_cim:cm_work_handler+0} Jul 30 16:10:12 oss08p kernel: <ffffffffa00694d2>{:ib_cm:cm_work_handler+66} <ffffffff80133133>{__wake_up+67} Jul 30 16:10:12 oss08p kernel: <ffffffffa0069490>{:ib_cm:cm_work_handler+0} <ffffffff80149110>{worker_thread+496} Jul 30 n16:10:12 oss08p kernel: <ffffffff80133070>{default_wake_function+0} <ffffffff801330c0>{__wake_up_common+64} Jul 30 16:10:12 oss08p kernel: <ffffffff80133070>{default_wake_function+0} <ffffffff8014d630>{keventd_create_kthread+0} Jul 30 16:g10:12 oss08p kernel: <ffffffff80148f20>{worker_thread+0} <ffffffff8014d630>{keventd_create_kthread+0} Jul 30 16:10:12 oss08p kernel: <ffffffff8014d5e9>{kthread+217} <ffffffff8011144b>{child_rip+8} Jul 30 16:10:12 oss08p kernel: <ffffffff8014d630>{:keventd_create_kthread+0} <ffffffff8014d510>{kthread+0}
Jul 30 16:10:12 oss08p kernel:        <ffffffff80111443>{child_rip+0}
Jul 30 16:10:12 oss08p kernel:
Jul 30 16:10:12 oss08p kernel: Code: 0f 0b 01 f0 0a a0 ff ff ff ff 74 01 66 66 90 66 90 65 8b 04 Jul 30 16:10:12 oss08p kernel: RIP <ffffffffa00abbbf>{:ib_sdp:sdp_connect_handler+207} RSP <0000010155f21bc8>
Jul 30 16:10:12 oss08p kernel:  <0>Kernel panic - not syncing: Oops
Oops

_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to