From 2025f4ee67f8c222e62ee7eb51a4106aca7117b0 Mon Sep 17 00:00:00 2001
From: Sergey Kandaurov <pluknet@nginx.com>
Date: Fri, 25 Apr 2025 23:32:24 +0400
Subject: [PATCH] QUIC: fixed sending acknowledgments with limited congestion
 window.

Previously, it was not possible to send acknowledgments if the
congestion window was limited or temporarily exceeded, such as
after sending a large response or MTU probe.

This is especially visible if ACKs aren't getting received from
the peer for some reason, in order to make the inflight counter
decreasing, which may result in the stalled connection.

The fix is to teach ngx_quic_output() to send ACKs regardless
of the congestion window.  This meets RFC 9002, Section 7:

: An endpoint MUST NOT send a packet if it would cause bytes_in_flight
: (see Appendix B.2) to be larger than the congestion window

And RFC 9002, Appendix B.2:

: bytes_in_flight:
:   Packets only containing ACK frames do not count toward
:   bytes_in_flight to ensure congestion control does not impede
:   congestion feedback.

Reported by Vladimir Homutov.
---
 src/event/quic/ngx_event_quic_output.c | 41 ++++++++++++++++++--------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/src/event/quic/ngx_event_quic_output.c b/src/event/quic/ngx_event_quic_output.c
index a92a539f3..f08624814 100644
--- a/src/event/quic/ngx_event_quic_output.c
+++ b/src/event/quic/ngx_event_quic_output.c
@@ -55,7 +55,8 @@ static ssize_t ngx_quic_send_segments(ngx_connection_t *c, u_char *buf,
     size_t len, struct sockaddr *sockaddr, socklen_t socklen, size_t segment);
 #endif
 static ssize_t ngx_quic_output_packet(ngx_connection_t *c,
-    ngx_quic_send_ctx_t *ctx, u_char *data, size_t max, size_t min);
+    ngx_quic_send_ctx_t *ctx, u_char *data, size_t max, size_t min,
+    ngx_uint_t ack_only);
 static void ngx_quic_init_packet(ngx_connection_t *c, ngx_quic_send_ctx_t *ctx,
     ngx_quic_header_t *pkt, ngx_quic_path_t *path);
 static ngx_uint_t ngx_quic_get_padding_level(ngx_connection_t *c);
@@ -131,8 +132,7 @@ ngx_quic_create_datagrams(ngx_connection_t *c)
     ngx_memzero(preserved_pnum, sizeof(preserved_pnum));
 #endif
 
-    while (cg->in_flight < cg->window) {
-
+    do {
         p = dst;
 
         len = ngx_quic_path_limit(c, path, path->mtu);
@@ -158,7 +158,8 @@ ngx_quic_create_datagrams(ngx_connection_t *c)
                 return NGX_OK;
             }
 
-            n = ngx_quic_output_packet(c, ctx, p, len, min);
+            n = ngx_quic_output_packet(c, ctx, p, len, min,
+                                       cg->in_flight >= cg->window);
             if (n == NGX_ERROR) {
                 return NGX_ERROR;
             }
@@ -187,7 +188,8 @@ ngx_quic_create_datagrams(ngx_connection_t *c)
         ngx_quic_commit_send(c);
 
         path->sent += len;
-    }
+
+    } while (cg->in_flight < cg->window);
 
     return NGX_OK;
 }
@@ -315,6 +317,10 @@ ngx_quic_allow_segmentation(ngx_connection_t *c)
 
         bytes += f->len;
 
+        if (qc->congestion.in_flight + bytes >= qc->congestion.window) {
+            return 0;
+        }
+
         if (bytes > len * 3) {
             /* require at least ~3 full packets to batch */
             return 1;
@@ -364,7 +370,7 @@ ngx_quic_create_segments(ngx_connection_t *c)
 
         if (len && cg->in_flight + (p - dst) < cg->window) {
 
-            n = ngx_quic_output_packet(c, ctx, p, len, len);
+            n = ngx_quic_output_packet(c, ctx, p, len, len, 0);
             if (n == NGX_ERROR) {
                 return NGX_ERROR;
             }
@@ -521,7 +527,7 @@ ngx_quic_get_padding_level(ngx_connection_t *c)
 
 static ssize_t
 ngx_quic_output_packet(ngx_connection_t *c, ngx_quic_send_ctx_t *ctx,
-    u_char *data, size_t max, size_t min)
+    u_char *data, size_t max, size_t min, ngx_uint_t ack_only)
 {
     size_t                  len, pad, min_payload, max_payload;
     u_char                 *p;
@@ -585,6 +591,10 @@ ngx_quic_output_packet(ngx_connection_t *c, ngx_quic_send_ctx_t *ctx,
     {
         f = ngx_queue_data(q, ngx_quic_frame_t, queue);
 
+        if (ack_only && f->type != NGX_QUIC_FT_ACK) {
+            continue;
+        }
+
         if (len >= max_payload) {
             break;
         }
@@ -644,21 +654,26 @@ ngx_quic_output_packet(ngx_connection_t *c, ngx_quic_send_ctx_t *ctx,
 
     ctx->pnum++;
 
+    q = ngx_queue_head(&ctx->frames);
+
     if (pkt.need_ack) {
-        q = ngx_queue_head(&ctx->frames);
         f = ngx_queue_data(q, ngx_quic_frame_t, queue);
-
         f->plen = res.len;
     }
 
-    while (nframes--) {
-        q = ngx_queue_head(&ctx->frames);
+    while (nframes-- && q != ngx_queue_sentinel(&ctx->frames)) {
+
         f = ngx_queue_data(q, ngx_quic_frame_t, queue);
+        q = ngx_queue_next(q);
+
+        if (ack_only && f->type != NGX_QUIC_FT_ACK) {
+            continue;
+        }
 
         f->pkt_need_ack = pkt.need_ack;
 
-        ngx_queue_remove(q);
-        ngx_queue_insert_tail(&ctx->sending, q);
+        ngx_queue_remove(&f->queue);
+        ngx_queue_insert_tail(&ctx->sending, &f->queue);
     }
 
     return res.len;
-- 
2.39.5 (Apple Git-154)

