Re: [PATCH net-next 1/3] net/tls: use RCU protection on icsk->icsk_ulp_data

2019-08-19 Thread Davide Caratti
On Thu, 2019-08-15 at 14:32 -0700, Jakub Kicinski wrote:
> On Thu, 15 Aug 2019 18:00:42 +0200, Davide Caratti wrote:
> > From: Jakub Kicinski 
> > 
> > We need to make sure context does not get freed while diag
> > code is interrogating it. Free struct tls_context with
> > kfree_rcu().
> > 
> > We add the __rcu annotation directly in icsk, and cast it
> > away in the datapath accessor. Presumably all ULPs will
> > do a similar thing.
> > 
> > Signed-off-by: Jakub Kicinski 

hello Jakub,

> > @@ -251,14 +251,31 @@ static void tls_write_space(struct sock *sk)
> > ctx->sk_write_space(sk);
> >  }
> >  
> > -void tls_ctx_free(struct tls_context *ctx)
> > +/**
> > + * tls_ctx_free() - free TLS ULP context
> > + * @sk:  socket to with @ctx is attached
> > + * @ctx: TLS context structure
> > + *
> > + * Free TLS context. If @sk is %NULL caller guarantees that the socket
> > + * to which @ctx was attached has no outstanding references.
> > + */
> > +void tls_ctx_free(struct sock *sk, struct tls_context *ctx)
> >  {
> > +   struct inet_connection_sock *icsk;
> > +
> > if (!ctx)
> > return;
> >  
> > memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send));
> > memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv));
> > -   kfree(ctx);
> > +
> > +   if (sk) {
> > +   icsk = inet_csk(sk);
> > +   rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
> 
> Now that we kind of want to set the icsk_ulp_data to NULL under the
> callback_lock I think we should let the callers do it.

Ok, I will fix this in series v2.

> > 
> > @@ -649,8 +666,8 @@ static void tls_hw_sk_destruct(struct sock *sk)
> >  
> > ctx->sk_destruct(sk);
> > /* Free ctx */
> > -   tls_ctx_free(ctx);
> > -   icsk->icsk_ulp_data = NULL;
> > +   tls_ctx_free(sk, ctx);
> > +   rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
> 
> Let's reorder the assignment before the free.

Ok, I will fix this in series v2.

thanks!
-- 
davide




Re: [PATCH net-next 1/3] net/tls: use RCU protection on icsk->icsk_ulp_data

2019-08-15 Thread Jakub Kicinski
On Thu, 15 Aug 2019 18:00:42 +0200, Davide Caratti wrote:
> From: Jakub Kicinski 
> 
> We need to make sure context does not get freed while diag
> code is interrogating it. Free struct tls_context with
> kfree_rcu().
> 
> We add the __rcu annotation directly in icsk, and cast it
> away in the datapath accessor. Presumably all ULPs will
> do a similar thing.
> 
> Signed-off-by: Jakub Kicinski 
> ---
>  include/net/inet_connection_sock.h |  2 +-
>  include/net/tls.h  |  9 +++--
>  net/core/sock_map.c|  2 +-
>  net/tls/tls_device.c   |  2 +-
>  net/tls/tls_main.c | 31 +++---
>  5 files changed, 34 insertions(+), 12 deletions(-)
> 
> diff --git a/include/net/inet_connection_sock.h 
> b/include/net/inet_connection_sock.h
> index c57d53e7e02c..895546058a20 100644
> --- a/include/net/inet_connection_sock.h
> +++ b/include/net/inet_connection_sock.h
> @@ -97,7 +97,7 @@ struct inet_connection_sock {
>   const struct tcp_congestion_ops *icsk_ca_ops;
>   const struct inet_connection_sock_af_ops *icsk_af_ops;
>   const struct tcp_ulp_ops  *icsk_ulp_ops;
> - void  *icsk_ulp_data;
> + void __rcu*icsk_ulp_data;
>   void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
>   struct hlist_node icsk_listen_portaddr_node;
>   unsigned int  (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
> diff --git a/include/net/tls.h b/include/net/tls.h
> index 41b2d41bb1b8..4997742475cd 100644
> --- a/include/net/tls.h
> +++ b/include/net/tls.h
> @@ -41,6 +41,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -290,6 +291,7 @@ struct tls_context {
>  
>   struct list_head list;
>   refcount_t refcount;
> + struct rcu_head rcu;
>  };
>  
>  enum tls_offload_ctx_dir {
> @@ -348,7 +350,7 @@ struct tls_offload_context_rx {
>  #define TLS_OFFLOAD_CONTEXT_SIZE_RX  \
>   (sizeof(struct tls_offload_context_rx) + TLS_DRIVER_STATE_SIZE_RX)
>  
> -void tls_ctx_free(struct tls_context *ctx);
> +void tls_ctx_free(struct sock *sk, struct tls_context *ctx);
>  int wait_on_pending_writer(struct sock *sk, long *timeo);
>  int tls_sk_query(struct sock *sk, int optname, char __user *optval,
>   int __user *optlen);
> @@ -467,7 +469,10 @@ static inline struct tls_context *tls_get_ctx(const 
> struct sock *sk)
>  {
>   struct inet_connection_sock *icsk = inet_csk(sk);
>  
> - return icsk->icsk_ulp_data;
> + /* Use RCU on icsk_ulp_data only for sock diag code,
> +  * TLS data path doesn't need rcu_dereference().
> +  */
> + return (__force void *)icsk->icsk_ulp_data;
>  }
>  
>  static inline void tls_advance_record_sn(struct sock *sk,
> diff --git a/net/core/sock_map.c b/net/core/sock_map.c
> index 1330a7442e5b..01998860afaa 100644
> --- a/net/core/sock_map.c
> +++ b/net/core/sock_map.c
> @@ -345,7 +345,7 @@ static int sock_map_update_common(struct bpf_map *map, 
> u32 idx,
>   return -EINVAL;
>   if (unlikely(idx >= map->max_entries))
>   return -E2BIG;
> - if (unlikely(icsk->icsk_ulp_data))
> + if (unlikely(rcu_access_pointer(icsk->icsk_ulp_data)))
>   return -EINVAL;
>  
>   link = sk_psock_init_link();
> diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
> index d184230665eb..436df5b4bb60 100644
> --- a/net/tls/tls_device.c
> +++ b/net/tls/tls_device.c
> @@ -61,7 +61,7 @@ static void tls_device_free_ctx(struct tls_context *ctx)
>   if (ctx->rx_conf == TLS_HW)
>   kfree(tls_offload_ctx_rx(ctx));
>  
> - tls_ctx_free(ctx);
> + tls_ctx_free(NULL, ctx);
>  }
>  
>  static void tls_device_gc_task(struct work_struct *work)
> diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
> index 9cbbae606ced..04829bef514c 100644
> --- a/net/tls/tls_main.c
> +++ b/net/tls/tls_main.c
> @@ -251,14 +251,31 @@ static void tls_write_space(struct sock *sk)
>   ctx->sk_write_space(sk);
>  }
>  
> -void tls_ctx_free(struct tls_context *ctx)
> +/**
> + * tls_ctx_free() - free TLS ULP context
> + * @sk:  socket to with @ctx is attached
> + * @ctx: TLS context structure
> + *
> + * Free TLS context. If @sk is %NULL caller guarantees that the socket
> + * to which @ctx was attached has no outstanding references.
> + */
> +void tls_ctx_free(struct sock *sk, struct tls_context *ctx)
>  {
> + struct inet_connection_sock *icsk;
> +
>   if (!ctx)
>   return;
>  
>   memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send));
>   memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv));
> - kfree(ctx);
> +
> + if (sk) {
> + icsk = inet_csk(sk);
> + rcu_assign_pointer(icsk->icsk_ulp_data, NULL);

Now that we kind of want to set the icsk_ulp_data to NULL under the
callback_lock I think we should let the callers do it.

> + kfree_rcu(ct

[PATCH net-next 1/3] net/tls: use RCU protection on icsk->icsk_ulp_data

2019-08-15 Thread Davide Caratti
From: Jakub Kicinski 

We need to make sure context does not get freed while diag
code is interrogating it. Free struct tls_context with
kfree_rcu().

We add the __rcu annotation directly in icsk, and cast it
away in the datapath accessor. Presumably all ULPs will
do a similar thing.

Signed-off-by: Jakub Kicinski 
---
 include/net/inet_connection_sock.h |  2 +-
 include/net/tls.h  |  9 +++--
 net/core/sock_map.c|  2 +-
 net/tls/tls_device.c   |  2 +-
 net/tls/tls_main.c | 31 +++---
 5 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/include/net/inet_connection_sock.h 
b/include/net/inet_connection_sock.h
index c57d53e7e02c..895546058a20 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -97,7 +97,7 @@ struct inet_connection_sock {
const struct tcp_congestion_ops *icsk_ca_ops;
const struct inet_connection_sock_af_ops *icsk_af_ops;
const struct tcp_ulp_ops  *icsk_ulp_ops;
-   void  *icsk_ulp_data;
+   void __rcu*icsk_ulp_data;
void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
struct hlist_node icsk_listen_portaddr_node;
unsigned int  (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
diff --git a/include/net/tls.h b/include/net/tls.h
index 41b2d41bb1b8..4997742475cd 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -41,6 +41,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -290,6 +291,7 @@ struct tls_context {
 
struct list_head list;
refcount_t refcount;
+   struct rcu_head rcu;
 };
 
 enum tls_offload_ctx_dir {
@@ -348,7 +350,7 @@ struct tls_offload_context_rx {
 #define TLS_OFFLOAD_CONTEXT_SIZE_RX\
(sizeof(struct tls_offload_context_rx) + TLS_DRIVER_STATE_SIZE_RX)
 
-void tls_ctx_free(struct tls_context *ctx);
+void tls_ctx_free(struct sock *sk, struct tls_context *ctx);
 int wait_on_pending_writer(struct sock *sk, long *timeo);
 int tls_sk_query(struct sock *sk, int optname, char __user *optval,
int __user *optlen);
@@ -467,7 +469,10 @@ static inline struct tls_context *tls_get_ctx(const struct 
sock *sk)
 {
struct inet_connection_sock *icsk = inet_csk(sk);
 
-   return icsk->icsk_ulp_data;
+   /* Use RCU on icsk_ulp_data only for sock diag code,
+* TLS data path doesn't need rcu_dereference().
+*/
+   return (__force void *)icsk->icsk_ulp_data;
 }
 
 static inline void tls_advance_record_sn(struct sock *sk,
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 1330a7442e5b..01998860afaa 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -345,7 +345,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 
idx,
return -EINVAL;
if (unlikely(idx >= map->max_entries))
return -E2BIG;
-   if (unlikely(icsk->icsk_ulp_data))
+   if (unlikely(rcu_access_pointer(icsk->icsk_ulp_data)))
return -EINVAL;
 
link = sk_psock_init_link();
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index d184230665eb..436df5b4bb60 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -61,7 +61,7 @@ static void tls_device_free_ctx(struct tls_context *ctx)
if (ctx->rx_conf == TLS_HW)
kfree(tls_offload_ctx_rx(ctx));
 
-   tls_ctx_free(ctx);
+   tls_ctx_free(NULL, ctx);
 }
 
 static void tls_device_gc_task(struct work_struct *work)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 9cbbae606ced..04829bef514c 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -251,14 +251,31 @@ static void tls_write_space(struct sock *sk)
ctx->sk_write_space(sk);
 }
 
-void tls_ctx_free(struct tls_context *ctx)
+/**
+ * tls_ctx_free() - free TLS ULP context
+ * @sk:  socket to with @ctx is attached
+ * @ctx: TLS context structure
+ *
+ * Free TLS context. If @sk is %NULL caller guarantees that the socket
+ * to which @ctx was attached has no outstanding references.
+ */
+void tls_ctx_free(struct sock *sk, struct tls_context *ctx)
 {
+   struct inet_connection_sock *icsk;
+
if (!ctx)
return;
 
memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send));
memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv));
-   kfree(ctx);
+
+   if (sk) {
+   icsk = inet_csk(sk);
+   rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
+   kfree_rcu(ctx, rcu);
+   } else {
+   kfree(ctx);
+   }
 }
 
 static void tls_sk_proto_cleanup(struct sock *sk,
@@ -306,7 +323,7 @@ static void tls_sk_proto_close(struct sock *sk, long 
timeout)
 
write_lock_bh(&sk->sk_callback_lock);
if (free_ctx)
-   icsk->icsk_ulp_data = NULL;
+   rcu_assign_pointer(icsk->icsk_ulp_