When the BPF sk_msg verdict sets apply_bytes smaller than the current
open record, tls_push_record() splits ctx->open_rec into the record
being encrypted and a remainder record. The synchronous path reattaches
the remainder to ctx->open_rec before continuing.

If the selected AEAD provider completes asynchronously,
tls_do_encryption() returns -EINPROGRESS after unhooking ctx->open_rec.
tls_push_record() currently returns immediately in that case, before
the split remainder is reattached. The remainder is no longer reachable
through ctx->open_rec or ctx->tx_list, which can silently drop
transmitted data and leak the unreachable tls_rec.

Keep the split remainder rooted even when encryption of the first record
is pending asynchronously, and continue the BPF verdict drain loop after
an async record has been queued. If that loop then hits a later verdict
error, wait for the pending async encryption before returning the error
so zerocopy user pages cannot be released while cryptd still reads them.

Fixes: d3b18ad31f93 ("tls: add bpf support to sk_msg handling")
Cc: [email protected] # 4.20+
Signed-off-by: Christopher Lusk <[email protected]>
Assisted-by: Codex:gpt-5.5
Assisted-by: Claude:claude-opus-4-7
---
 net/tls/tls_sw.c | 40 ++++++++++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 8 deletions(-)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 964ebc268..5b20be5b4 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -840,16 +840,19 @@ static int tls_push_record(struct sock *sk, int flags,
        rc = tls_do_encryption(sk, tls_ctx, ctx, req,
                               msg_pl->sg.size + prot->tail_size, i);
        if (rc < 0) {
-               if (rc != -EINPROGRESS) {
-                       tls_err_abort(sk, -EBADMSG);
-                       if (split) {
-                               tls_ctx->pending_open_record_frags = true;
-                               tls_merge_open_record(sk, rec, tmp, orig_end);
-                       }
+               if (rc == -EINPROGRESS)
+                       goto split_done;
+
+               tls_err_abort(sk, -EBADMSG);
+               if (split) {
+                       tls_ctx->pending_open_record_frags = true;
+                       tls_merge_open_record(sk, rec, tmp, orig_end);
                }
                ctx->async_capable = 1;
                return rc;
-       } else if (split) {
+       }
+split_done:
+       if (split) {
                msg_pl = &tmp->msg_plaintext;
                msg_en = &tmp->msg_encrypted;
                sk_msg_trim(sk, msg_en, msg_pl->sg.size + prot->overhead_size);
@@ -857,6 +860,11 @@ static int tls_push_record(struct sock *sk, int flags,
                ctx->open_rec = tmp;
        }
 
+       if (rc < 0) {
+               ctx->async_capable = 1;
+               return rc;
+       }
+
        return tls_tx_records(sk, flags);
 }
 
@@ -871,6 +879,8 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct 
sock *sk,
        struct sock *sk_redir;
        struct tls_rec *rec;
        bool enospc, policy, redir_ingress;
+       bool async = false;
+       int async_err = 0;
        int err = 0, send;
        u32 delta = 0;
 
@@ -920,6 +930,10 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct 
sock *sk,
        switch (psock->eval) {
        case __SK_PASS:
                err = tls_push_record(sk, flags, record_type);
+               if (err == -EINPROGRESS) {
+                       async = true;
+                       err = 0;
+               }
                if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) {
                        *copied -= sk_msg_free(sk, msg);
                        tls_free_open_rec(sk);
@@ -988,8 +1002,18 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct 
sock *sk,
                        goto more_data;
        }
  out_err:
+       if (async && err && err != -EINPROGRESS) {
+               async_err = tls_encrypt_async_wait(ctx);
+               if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) {
+                       /* tx_lock is held; the worker will reschedule if 
needed. */
+                       cancel_delayed_work(&ctx->tx_work.work);
+                       tls_tx_records(sk, flags);
+               }
+               if (async_err)
+                       err = async_err;
+       }
        sk_psock_put(sk, psock);
-       return err;
+       return err ?: (async ? -EINPROGRESS : 0);
 }
 
 static int tls_sw_push_pending_record(struct sock *sk, int flags)
-- 
2.54.0


Reply via email to