On Wed, Jun 17, 2020 at 02:45:12PM -0600, Jason A. Donenfeld wrote:
> Looks like my explanation there wasn't 100% accurate, but it does seem
> like the issue occurs when gcc sees a clear tail call that it can
> optimize into a B instruction instead of a BL instruction.
> 
> The below patch avoids that, and thus fixes your issue, using a pretty
> bad trick that's not really suitable for being committed anywhere, but
> it is perhaps leading us in the right direction:
> 
> diff --git a/src/send.c b/src/send.c
> index 828b086a..4bb6911f 100644
> --- a/src/send.c
> +++ b/src/send.c
> @@ -221,6 +221,8 @@ static bool encrypt_packet(struct sk_buff *skb, struct 
> noise_keypair *keypair,
>      simd_context);
>  }
>  
> +volatile char dummy;
> +
>  void wg_packet_send_keepalive(struct wg_peer *peer)
>  {
>   struct sk_buff *skb;
> @@ -240,6 +242,7 @@ void wg_packet_send_keepalive(struct wg_peer *peer)
>   }
>  
>   wg_packet_send_staged_packets(peer);
> + dummy = -1;
>  }
>  
>  static void wg_packet_create_data_done(struct sk_buff *first,

A better fix with more explanation: it looks like the issue doesn't have
to do with the multifile thing I pointed out before, but just that gcc
sees it can optimize the tail call into a B instruction, which seems to
have a ±2KB range, whereas BL has a ±4MB range. The solution is to just
move the location of the function in that file to be closer to the
destination of the tail call. I'm not a big fan of that and I'm slightly
worried davem will nack it because it makes backporting harder for a
fairly speculative gain (at least, I haven't yet taken measurements,
though I suppose I could). There's also the question of - why are we
doing goofy reordering things to the code to work around a toolchain
bug? Shouldn't we fix the toolchain? So, I'll keep thinking...

diff --git a/src/send.c b/src/send.c
index 828b086a..f44aff8d 100644
--- a/src/send.c
+++ b/src/send.c
@@ -221,27 +221,6 @@ static bool encrypt_packet(struct sk_buff *skb, struct 
noise_keypair *keypair,
                                                   simd_context);
 }

-void wg_packet_send_keepalive(struct wg_peer *peer)
-{
-       struct sk_buff *skb;
-
-       if (skb_queue_empty(&peer->staged_packet_queue)) {
-               skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH,
-                               GFP_ATOMIC);
-               if (unlikely(!skb))
-                       return;
-               skb_reserve(skb, DATA_PACKET_HEAD_ROOM);
-               skb->dev = peer->device->dev;
-               PACKET_CB(skb)->mtu = skb->dev->mtu;
-               skb_queue_tail(&peer->staged_packet_queue, skb);
-               net_dbg_ratelimited("%s: Sending keepalive packet to peer %llu 
(%pISpfsc)\n",
-                                   peer->device->dev->name, peer->internal_id,
-                                   &peer->endpoint.addr);
-       }
-
-       wg_packet_send_staged_packets(peer);
-}
-
 static void wg_packet_create_data_done(struct sk_buff *first,
                                       struct wg_peer *peer)
 {
@@ -346,6 +325,27 @@ err:
        kfree_skb_list(first);
 }

+void wg_packet_send_keepalive(struct wg_peer *peer)
+{
+       struct sk_buff *skb;
+
+       if (skb_queue_empty(&peer->staged_packet_queue)) {
+               skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH,
+                               GFP_ATOMIC);
+               if (unlikely(!skb))
+                       return;
+               skb_reserve(skb, DATA_PACKET_HEAD_ROOM);
+               skb->dev = peer->device->dev;
+               PACKET_CB(skb)->mtu = skb->dev->mtu;
+               skb_queue_tail(&peer->staged_packet_queue, skb);
+               net_dbg_ratelimited("%s: Sending keepalive packet to peer %llu 
(%pISpfsc)\n",
+                                   peer->device->dev->name, peer->internal_id,
+                                   &peer->endpoint.addr);
+       }
+
+       wg_packet_send_staged_packets(peer);
+}
+
 void wg_packet_purge_staged_packets(struct wg_peer *peer)
 {
        spin_lock_bh(&peer->staged_packet_queue.lock);

Reply via email to