在 2026/3/6 17:10, David Ahern 写道:
On 3/6/26 1:24 AM, Zhu Yanjun wrote:
diff --git a/drivers/infiniband/sw/rxe/rxe_ns.c
b/drivers/infiniband/sw/rxe/rxe_ns.c
new file mode 100644
index 000000000000..29d08899dcda
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_ns.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ */
+
+#include <net/sock.h>
+#include <net/netns/generic.h>
+#include <net/net_namespace.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/pid_namespace.h>
+#include <net/udp_tunnel.h>
+
+#include "rxe_ns.h"
+
+/*
+ * Per network namespace data
+ */
+struct rxe_ns_sock {
+ struct sock __rcu *rxe_sk4;
+ struct sock __rcu *rxe_sk6;
+};
+
+/*
+ * Index to store custom data for each network namespace.
+ */
+static unsigned int rxe_pernet_id;
+
+/*
+ * Called for every existing and added network namespaces
+ */
+static int __net_init rxe_ns_init(struct net *net)
+{
+ /*
+ * create (if not present) and access data item in network namespace
+ * (net) using the id (net_id)
+ */
this comment is not needed; does not really convey anything useful. I
would like this function to have the comment from my patch:
/* defer socket create in the namespace to the first
* device create.
*/
this makes it clear why init and exit are not symmetrical.
+ struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id);
+
+ rcu_assign_pointer(ns_sk->rxe_sk4, NULL); /* initialize sock 4 socket */
+ rcu_assign_pointer(ns_sk->rxe_sk6, NULL); /* initialize sock 6 socket */
+ synchronize_rcu();
I believe the core network namespace code ensures the memory is
initialized, so this is not needed.
+
+ return 0;
+}
+
+static void __net_exit rxe_ns_exit(struct net *net)
+{
+ /*
+ * called when the network namespace is removed
+ */
+ struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id);
+ struct sock *rxe_sk4 = NULL;
+ struct sock *rxe_sk6 = NULL;
initialization is not needed since both are set before use.
+
+ rcu_read_lock();
+ rxe_sk4 = rcu_dereference(ns_sk->rxe_sk4);
+ rxe_sk6 = rcu_dereference(ns_sk->rxe_sk6);
+ rcu_read_unlock();
+
+ /* close socket */
+ if (rxe_sk4 && rxe_sk4->sk_socket) {
how can rxe_sk4 be non-NULL and yet sk_socket become NULL?
+ udp_tunnel_sock_release(rxe_sk4->sk_socket);
+ rcu_assign_pointer(ns_sk->rxe_sk4, NULL);
if you flip the order
rcu_assign_pointer(ns_sk->rxe_sk4, NULL);
/* udp_tunnel_sock_release calls synchronize_rcu */
udp_tunnel_sock_release(rxe_sk4->sk_socket);
you should be able to drop the synchronize_rcu here:
+ synchronize_rcu();
+ }
+
+ if (rxe_sk6 && rxe_sk6->sk_socket) {
same here.
All the mentioned problems are fix in the latest commit.
Zhu Yanjun
+ udp_tunnel_sock_release(rxe_sk6->sk_socket);
+ rcu_assign_pointer(ns_sk->rxe_sk6, NULL);
+ synchronize_rcu();> + }
+}
+
--
Best Regards,
Yanjun.Zhu