Re: [PATCH net-next v20 12/25] ovpn: implement TCP transport

2025-03-03 Thread Antonio Quartulli

On 03/03/2025 16:08, Sabrina Dubroca wrote:

2025-02-27, 02:21:37 +0100, Antonio Quartulli wrote:

@@ -94,11 +96,23 @@ void ovpn_socket_release(struct ovpn_peer *peer)
 * detached before it can be picked by a concurrent reader.
 */
lock_sock(sock->sock->sk);
-   ovpn_socket_put(peer, sock);
+   released = ovpn_socket_put(peer, sock);
release_sock(sock->sock->sk);
  
  	/* align all readers with sk_user_data being NULL */

synchronize_rcu();
+
+   /* following cleanup should happen with lock released */
+   if (released) {
+   if (sock->sock->sk->sk_protocol == IPPROTO_UDP) {
+   netdev_put(sock->ovpn->dev, &sock->dev_tracker);
+   } else if (sock->sock->sk->sk_protocol == IPPROTO_TCP) {
+   /* wait for TCP jobs to terminate */
+   ovpn_tcp_socket_wait_finish(sock);
+   ovpn_peer_put(sock->peer);
+   }
+   kfree_rcu(sock, rcu);


kfree_rcu after synchronize_rcu is a bit unexpected. Do we still need
to wait before we free sock, now that we have synchronize_rcu before?


Ah good point. Waiting one RCU period is what kfree_rcu() is there for, 
hence we could just call kfree() at this point.





+   }
  }
  





+static int ovpn_tcp_parse(struct strparser *strp, struct sk_buff *skb)
+{
+   struct strp_msg *rxm = strp_msg(skb);
+   __be16 blen;
+   u16 len;
+   int err;
+
+   /* when packets are written to the TCP stream, they are prepended with
+* two bytes indicating the actual packet size.
+* Here we read those two bytes and move the skb data pointer to the
+* beginning of the packet


There's no update to skb->data being done in ovpn_tcp_parse AFAICT.


I concur :)



[...]

+static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb)
+{
+   struct ovpn_peer *peer = container_of(strp, struct ovpn_peer, tcp.strp);
+   struct strp_msg *msg = strp_msg(skb);
+   size_t pkt_len = msg->full_len - 2;
+   size_t off = msg->offset + 2;
+   u8 opcode;
+
+   /* ensure skb->data points to the beginning of the openvpn packet */
+   if (!pskb_pull(skb, off)) {


Is that the one you mean in the previous comment?


Yes. The comment was probably placed there in a previous 
version/prototype and never moved.


I'll fix that.




+   net_warn_ratelimited("%s: packet too small for peer %u\n",
+netdev_name(peer->ovpn->dev), peer->id);
+   goto err;
+   }

[some checks]

+   /* DATA_V2 packets are handled in kernel, the rest goes to user space */
+   opcode = ovpn_opcode_from_skb(skb, 0);
+   if (unlikely(opcode != OVPN_DATA_V2)) {
+   if (opcode == OVPN_DATA_V1) {
+   net_warn_ratelimited("%s: DATA_V1 detected on the TCP 
stream\n",
+netdev_name(peer->ovpn->dev));
+   goto err;


In TCP encap, receiving OVPN_DATA_V1 packets is going to kill the peer:


+err:
+   dev_core_stats_rx_dropped_inc(peer->ovpn->dev);
+   kfree_skb(skb);
+   ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_TRANSPORT_ERROR);
+}
+


but that's not the case with the UDP encap (ovpn_udp_encap_recv simply
drops those packets). Should the TCP/UDP behavior be consistent?


Ideally a UDP DATA_V1 could be just a replayed/spoofed packet, so 
killing the peer doesn't sound good. It'd be a very easy attack.


Doing the same in TCP is much much harder (if practical at all) and in 
that case it'd be impossible to understand what's happening on the 
stream, so we just abort.


I think any TCP connection (without TCP-MD5) that gets messed up this 
way will abort.









+void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct socket *sock,
+  struct sk_buff *skb)
+{
+   u16 len = skb->len;
+
+   *(__be16 *)__skb_push(skb, sizeof(u16)) = htons(len);
+
+   spin_lock_nested(&sock->sk->sk_lock.slock, OVPN_TCP_DEPTH_NESTING);


With this, lockdep is still going to complain in the unlikely case
that ovpn-TCP traffic is carried over another ovpn-TCP socket, right?
(probably fine to leave it like that)


Yeah.
I am not sure how much complexity we'd need to workaround that.
I also assume it's fine to keep it this way (this is also what L2TP does).




[...]

+static int ovpn_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+   struct ovpn_socket *sock;
+   int ret, linear = PAGE_SIZE;
+   struct ovpn_peer *peer;
+   struct sk_buff *skb;
+
+   lock_sock(sk);
+   rcu_read_lock();
+   sock = rcu_dereference_sk_user_data(sk);
+   if (unlikely(!sock || !sock->peer || !ovpn_peer_hold(sock->peer))) {
+   rcu_read_unlock();
+   release_sock(sk);
+   return -EIO;
+   }
+   rcu_read_unlock();
+   peer = sock->peer;


This 

Re: [PATCH net-next v20 12/25] ovpn: implement TCP transport

2025-03-03 Thread Sabrina Dubroca
2025-02-27, 02:21:37 +0100, Antonio Quartulli wrote:
> @@ -94,11 +96,23 @@ void ovpn_socket_release(struct ovpn_peer *peer)
>* detached before it can be picked by a concurrent reader.
>*/
>   lock_sock(sock->sock->sk);
> - ovpn_socket_put(peer, sock);
> + released = ovpn_socket_put(peer, sock);
>   release_sock(sock->sock->sk);
>  
>   /* align all readers with sk_user_data being NULL */
>   synchronize_rcu();
> +
> + /* following cleanup should happen with lock released */
> + if (released) {
> + if (sock->sock->sk->sk_protocol == IPPROTO_UDP) {
> + netdev_put(sock->ovpn->dev, &sock->dev_tracker);
> + } else if (sock->sock->sk->sk_protocol == IPPROTO_TCP) {
> + /* wait for TCP jobs to terminate */
> + ovpn_tcp_socket_wait_finish(sock);
> + ovpn_peer_put(sock->peer);
> + }
> + kfree_rcu(sock, rcu);

kfree_rcu after synchronize_rcu is a bit unexpected. Do we still need
to wait before we free sock, now that we have synchronize_rcu before?

> + }
>  }
>  



> +static int ovpn_tcp_parse(struct strparser *strp, struct sk_buff *skb)
> +{
> + struct strp_msg *rxm = strp_msg(skb);
> + __be16 blen;
> + u16 len;
> + int err;
> +
> + /* when packets are written to the TCP stream, they are prepended with
> +  * two bytes indicating the actual packet size.
> +  * Here we read those two bytes and move the skb data pointer to the
> +  * beginning of the packet

There's no update to skb->data being done in ovpn_tcp_parse AFAICT.

[...]
> +static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb)
> +{
> + struct ovpn_peer *peer = container_of(strp, struct ovpn_peer, tcp.strp);
> + struct strp_msg *msg = strp_msg(skb);
> + size_t pkt_len = msg->full_len - 2;
> + size_t off = msg->offset + 2;
> + u8 opcode;
> +
> + /* ensure skb->data points to the beginning of the openvpn packet */
> + if (!pskb_pull(skb, off)) {

Is that the one you mean in the previous comment?

> + net_warn_ratelimited("%s: packet too small for peer %u\n",
> +  netdev_name(peer->ovpn->dev), peer->id);
> + goto err;
> + }
[some checks]
> + /* DATA_V2 packets are handled in kernel, the rest goes to user space */
> + opcode = ovpn_opcode_from_skb(skb, 0);
> + if (unlikely(opcode != OVPN_DATA_V2)) {
> + if (opcode == OVPN_DATA_V1) {
> + net_warn_ratelimited("%s: DATA_V1 detected on the TCP 
> stream\n",
> +  netdev_name(peer->ovpn->dev));
> + goto err;

In TCP encap, receiving OVPN_DATA_V1 packets is going to kill the peer:

> +err:
> + dev_core_stats_rx_dropped_inc(peer->ovpn->dev);
> + kfree_skb(skb);
> + ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_TRANSPORT_ERROR);
> +}
> +

but that's not the case with the UDP encap (ovpn_udp_encap_recv simply
drops those packets). Should the TCP/UDP behavior be consistent?




> +void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct socket *sock,
> +struct sk_buff *skb)
> +{
> + u16 len = skb->len;
> +
> + *(__be16 *)__skb_push(skb, sizeof(u16)) = htons(len);
> +
> + spin_lock_nested(&sock->sk->sk_lock.slock, OVPN_TCP_DEPTH_NESTING);

With this, lockdep is still going to complain in the unlikely case
that ovpn-TCP traffic is carried over another ovpn-TCP socket, right?
(probably fine to leave it like that)


[...]
> +static int ovpn_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
> +{
> + struct ovpn_socket *sock;
> + int ret, linear = PAGE_SIZE;
> + struct ovpn_peer *peer;
> + struct sk_buff *skb;
> +
> + lock_sock(sk);
> + rcu_read_lock();
> + sock = rcu_dereference_sk_user_data(sk);
> + if (unlikely(!sock || !sock->peer || !ovpn_peer_hold(sock->peer))) {
> + rcu_read_unlock();
> + release_sock(sk);
> + return -EIO;
> + }
> + rcu_read_unlock();
> + peer = sock->peer;

This used to be done under RCU in previous versions of the series. Why
is it after rcu_read_unlock now? (likely safe since we're under
lock_sock so detach can't happen)

> +
> + if (msg->msg_flags & ~MSG_DONTWAIT) {
> + ret = -EOPNOTSUPP;
> + goto peer_free;
> + }


-- 
Sabrina



Re: [PATCH net-next v20 12/25] ovpn: implement TCP transport

2025-03-02 Thread Antonio Quartulli

On 02/03/2025 19:59, Sabrina Dubroca wrote:

2025-02-27, 02:21:37 +0100, Antonio Quartulli wrote:

Moreover export tcp_release_cb by means of EXPORT_SYMBOL instead of
EXPORT_IPV6_MOD, so that other modules can use it, even if IPV6 is
not compiled in.


Is that really needed? You're saving tcp.sk_cb.prot, so you could just
call peer->tcp.sk_cb.prot->release_cb? (with a bit of care since it's
called after peer_put)

[I don't know what the maintainers' preference is wrt "re-exporting"
symbols that got moved to EXPORT_IPV6_MOD]


Mh yeah, I like your suggestion as I don't have to change the EXPORT and 
also I don't have to make assumptions on the original value of 
->release_cb as I can just call it.


Will follow your suggestion. Thanks!




[...]

+static void ovpn_tcp_send_sock(struct ovpn_peer *peer, struct sock *sk)
+{
+   struct sk_buff *skb = peer->tcp.out_msg.skb;
+
+   if (!skb)
+   return;
+
+   if (peer->tcp.tx_in_progress)
+   return;
+
+   peer->tcp.tx_in_progress = true;
+
+   do {
+   int ret = skb_send_sock_locked(sk, skb,
+  peer->tcp.out_msg.offset,
+  peer->tcp.out_msg.len);
+   if (unlikely(ret < 0)) {
+   if (ret == -EAGAIN)
+   goto out;
+
+   net_warn_ratelimited("%s: TCP error to peer %u: %d\n",
+netdev_name(peer->ovpn->dev),
+peer->id, ret);
+
+   /* in case of TCP error we can't recover the VPN
+* stream therefore we abort the connection
+*/
+   ovpn_peer_del(peer,
+ OVPN_DEL_PEER_REASON_TRANSPORT_ERROR);


I don't think this works:

ovpn_peer_del -> unlock_ovpn -> ovpn_socket_release -> might_sleep

but we can get to ovpn_tcp_send_sock in a few contexts that are not
allowed to sleep:

ovpn_tcp_send_skb -> ovpn_tcp_send_sock_skb -> ovpn_tcp_send_sock
__sk_flush_backlog -> release_cb = ovpn_tcp_release -> ovpn_tcp_send_sock_skb
release_sock   -> release_cb = ovpn_tcp_release -> ovpn_tcp_send_sock_skb


(I checked all other paths leading to unlock_ovpn/ovpn_socket_release,
this is the only one I could find that is not allowed to sleep. So it
would likely be easier to push this peer_del (or even just the
handling of release_list) into some other sleepable context than
trying to reshuffle all the other paths)


ACK (and thanks for double checking all other paths!)
I will do some magic on this peer_del() in order to make it sleep 
without issues.


Thanks!

Regards,





--
Antonio Quartulli
OpenVPN Inc.




Re: [PATCH net-next v20 12/25] ovpn: implement TCP transport

2025-03-02 Thread Sabrina Dubroca
2025-02-27, 02:21:37 +0100, Antonio Quartulli wrote:
> Moreover export tcp_release_cb by means of EXPORT_SYMBOL instead of
> EXPORT_IPV6_MOD, so that other modules can use it, even if IPV6 is
> not compiled in.

Is that really needed? You're saving tcp.sk_cb.prot, so you could just
call peer->tcp.sk_cb.prot->release_cb? (with a bit of care since it's
called after peer_put)

[I don't know what the maintainers' preference is wrt "re-exporting"
symbols that got moved to EXPORT_IPV6_MOD]


[...]
> +static void ovpn_tcp_send_sock(struct ovpn_peer *peer, struct sock *sk)
> +{
> + struct sk_buff *skb = peer->tcp.out_msg.skb;
> +
> + if (!skb)
> + return;
> +
> + if (peer->tcp.tx_in_progress)
> + return;
> +
> + peer->tcp.tx_in_progress = true;
> +
> + do {
> + int ret = skb_send_sock_locked(sk, skb,
> +peer->tcp.out_msg.offset,
> +peer->tcp.out_msg.len);
> + if (unlikely(ret < 0)) {
> + if (ret == -EAGAIN)
> + goto out;
> +
> + net_warn_ratelimited("%s: TCP error to peer %u: %d\n",
> +  netdev_name(peer->ovpn->dev),
> +  peer->id, ret);
> +
> + /* in case of TCP error we can't recover the VPN
> +  * stream therefore we abort the connection
> +  */
> + ovpn_peer_del(peer,
> +   OVPN_DEL_PEER_REASON_TRANSPORT_ERROR);

I don't think this works:

ovpn_peer_del -> unlock_ovpn -> ovpn_socket_release -> might_sleep

but we can get to ovpn_tcp_send_sock in a few contexts that are not
allowed to sleep:

ovpn_tcp_send_skb -> ovpn_tcp_send_sock_skb -> ovpn_tcp_send_sock
__sk_flush_backlog -> release_cb = ovpn_tcp_release -> ovpn_tcp_send_sock_skb
release_sock   -> release_cb = ovpn_tcp_release -> ovpn_tcp_send_sock_skb


(I checked all other paths leading to unlock_ovpn/ovpn_socket_release,
this is the only one I could find that is not allowed to sleep. So it
would likely be easier to push this peer_del (or even just the
handling of release_list) into some other sleepable context than
trying to reshuffle all the other paths)

-- 
Sabrina