When tls records are decrypted using asynchronous acclerators such as
NXP CAAM engine, the crypto apis return -EINPROGRESS. Presently, on
getting -EINPROGRESS, the tls record processing stops till the time the
crypto accelerator finishes off and returns the result. This incurs a
context switch and is not an efficient way of accessing the crypto
accelerators. Crypto accelerators work efficient when they are queued
with multiple crypto jobs without having to wait for the previous ones
to complete.
The patch submits multiple crypto requests without having to wait for
for previous ones to complete. This has been implemented for records
which are decrypted in zero-copy mode. At the end of recvmsg(), we wait
for all the asynchronous decryption requests to complete.
The references to records which have been sent for async decryption are
dropped. For cases where record decryption is not possible in zero-copy
mode, asynchronous decryption is not used and we wait for decryption
crypto api to complete.
For crypto requests executing in async fashion, the memory for
aead_request, sglists and skb etc is freed from the decryption
completion handler. The decryption completion handler wakesup the
sleeping user context when recvmsg() flags that it has done sending
all the decryption requests and there are no more decryption requests
pending to be completed.
Signed-off-by: Vakul Garg
---
Changes since RFC version:
1) Improved commit message.
2) Fixed dequeued record offset handling because of which few of
tls selftests 'recv_partial, recv_peek, recv_peek_multiple' were
failing.
With this patch version, all tls selftests pass with both software based
synchronous crypto and hardware based asynchronous crypto.
Also, I tested sanity of file contents transferred over a tls session
backed by kernel based record layer.
include/net/tls.h | 6 +++
net/tls/tls_sw.c | 124 ++
2 files changed, 122 insertions(+), 8 deletions(-)
diff --git a/include/net/tls.h b/include/net/tls.h
index d5c683e8bb22..cd0a65bd92f9 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -124,6 +124,12 @@ struct tls_sw_context_rx {
struct sk_buff *recv_pkt;
u8 control;
bool decrypted;
+ atomic_t decrypt_pending;
+ bool async_notify;
+};
+
+struct decrypt_req_ctx {
+ struct sock *sk;
};
struct tls_record_info {
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 52fbe727d7c1..5d1e7c336807 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -43,12 +43,50 @@
#define MAX_IV_SIZETLS_CIPHER_AES_GCM_128_IV_SIZE
+static void tls_decrypt_done(struct crypto_async_request *req, int err)
+{
+ struct aead_request *aead_req = (struct aead_request *)req;
+ struct decrypt_req_ctx *req_ctx =
+ (struct decrypt_req_ctx *)(aead_req + 1);
+
+ struct scatterlist *sgout = aead_req->dst;
+
+ struct tls_context *tls_ctx = tls_get_ctx(req_ctx->sk);
+ struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+ int pending = atomic_dec_return(>decrypt_pending);
+ struct scatterlist *sg;
+ unsigned int pages;
+
+ /* Propagate if there was an err */
+ if (err) {
+ ctx->async_wait.err = err;
+ tls_err_abort(req_ctx->sk, err);
+ }
+
+ /* Release the skb, pages and memory allocated for crypto req */
+ kfree_skb(req->data);
+
+ /* Skip the first S/G entry as it points to AAD */
+ for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) {
+ if (!sg)
+ break;
+ put_page(sg_page(sg));
+ }
+
+ kfree(aead_req);
+
+ if (!pending && READ_ONCE(ctx->async_notify))
+ complete(>async_wait.completion);
+}
+
static int tls_do_decryption(struct sock *sk,
+struct sk_buff *skb,
struct scatterlist *sgin,
struct scatterlist *sgout,
char *iv_recv,
size_t data_len,
-struct aead_request *aead_req)
+struct aead_request *aead_req,
+bool async)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
@@ -59,10 +97,34 @@ static int tls_do_decryption(struct sock *sk,
aead_request_set_crypt(aead_req, sgin, sgout,
data_len + tls_ctx->rx.tag_size,
(u8 *)iv_recv);
- aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
- crypto_req_done, >async_wait);
- ret = crypto_wait_req(crypto_aead_decrypt(aead_req), >async_wait);
+ if (async) {
+ struct decrypt_req_ctx *req_ctx;
+
+ req_ctx = (struct decrypt_req_ctx