在 2026/3/6 17:10, David Ahern 写道:
On 3/6/26 1:24 AM, Zhu Yanjun wrote:
diff --git a/drivers/infiniband/sw/rxe/rxe_ns.c 
b/drivers/infiniband/sw/rxe/rxe_ns.c
new file mode 100644
index 000000000000..29d08899dcda
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_ns.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
+ */
+
+#include <net/sock.h>
+#include <net/netns/generic.h>
+#include <net/net_namespace.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/pid_namespace.h>
+#include <net/udp_tunnel.h>
+
+#include "rxe_ns.h"
+
+/*
+ * Per network namespace data
+ */
+struct rxe_ns_sock {
+       struct sock __rcu *rxe_sk4;
+       struct sock __rcu *rxe_sk6;
+};
+
+/*
+ * Index to store custom data for each network namespace.
+ */
+static unsigned int rxe_pernet_id;
+
+/*
+ * Called for every existing and added network namespaces
+ */
+static int __net_init rxe_ns_init(struct net *net)
+{
+       /*
+        * create (if not present) and access data item in network namespace
+        * (net) using the id (net_id)
+        */
this comment is not needed; does not really convey anything useful. I
would like this function to have the comment from my patch:

        /* defer socket create in the namespace to the first
         * device create.
         */

this makes it clear why init and exit are not symmetrical.

+       struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id);
+
+       rcu_assign_pointer(ns_sk->rxe_sk4, NULL); /* initialize sock 4 socket */
+       rcu_assign_pointer(ns_sk->rxe_sk6, NULL); /* initialize sock 6 socket */
+       synchronize_rcu();
I believe the core network namespace code ensures the memory is
initialized, so this is not needed.

+
+       return 0;
+}
+
+static void __net_exit rxe_ns_exit(struct net *net)
+{
+       /*
+        * called when the network namespace is removed
+        */
+       struct rxe_ns_sock *ns_sk = net_generic(net, rxe_pernet_id);
+       struct sock *rxe_sk4 = NULL;
+       struct sock *rxe_sk6 = NULL;
initialization is not needed since both are set before use.

+
+       rcu_read_lock();
+       rxe_sk4 = rcu_dereference(ns_sk->rxe_sk4);
+       rxe_sk6 = rcu_dereference(ns_sk->rxe_sk6);
+       rcu_read_unlock();
+
+       /* close socket */
+       if (rxe_sk4 && rxe_sk4->sk_socket) {
how can rxe_sk4 be non-NULL and yet sk_socket become NULL?

+               udp_tunnel_sock_release(rxe_sk4->sk_socket);
+               rcu_assign_pointer(ns_sk->rxe_sk4, NULL);
if you flip the order

                rcu_assign_pointer(ns_sk->rxe_sk4, NULL);
                /* udp_tunnel_sock_release calls synchronize_rcu */
                udp_tunnel_sock_release(rxe_sk4->sk_socket);


you should be able to drop the synchronize_rcu here:

+               synchronize_rcu();
+       }
+
+       if (rxe_sk6 && rxe_sk6->sk_socket) {
same here.

All the mentioned problems are fix in the latest commit.

Zhu Yanjun


+               udp_tunnel_sock_release(rxe_sk6->sk_socket);
+               rcu_assign_pointer(ns_sk->rxe_sk6, NULL);
+               synchronize_rcu();> +        }
+}
+

--
Best Regards,
Yanjun.Zhu


Reply via email to