Investigation of VZ US-QA cluster shows that congestion window
reduction after idle periods results in too slow window open
after data start to flow again.

So, introduce ssthresh to allow faster window open after
idle periods.

Maybe, even this is not enough and window should be open
even more aggressively. Further observations will show.

Signed-off-by: Pavel Butsykin <pbutsy...@virtuozzo.com>
---
 fs/fuse/kio/pcs/pcs_cs.c  | 26 +++++++++++++++++++++-----
 fs/fuse/kio/pcs/pcs_cs.h  |  1 +
 fs/fuse/kio/pcs/pcs_map.c |  7 ++++++-
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/fs/fuse/kio/pcs/pcs_cs.c b/fs/fuse/kio/pcs/pcs_cs.c
index 00cd1ae99850..58ad6aea17b8 100644
--- a/fs/fuse/kio/pcs/pcs_cs.c
+++ b/fs/fuse/kio/pcs/pcs_cs.c
@@ -62,6 +62,7 @@ struct pcs_cs *pcs_cs_alloc(struct pcs_cs_set *css,
        cs->in_flight = 0;
        cs->cwnd = PCS_CS_INIT_CWND;
        cs->eff_cwnd = PCS_CS_INIT_CWND;
+       cs->ssthresh = PCS_CS_INIT_CWND;
        cs->cwr_state = 0;
        atomic_set(&cs->latency_avg, 0);
        cs->net_latency_avg = 0;
@@ -603,6 +604,10 @@ static void handle_congestion(struct pcs_cs *cs, struct 
pcs_rpc_hdr *h)
                 * to half of min(in_flight, cwnd) and enter congestion 
reduction state,
                 * where we ignore further congestion notifications until 
window is reduced
                 */
+               if (who->cwnd >= PCS_CS_INIT_CWND)
+                       who->ssthresh = who->cwnd;
+               else
+                       who->ssthresh = PCS_CS_INIT_CWND;
                if (who->in_flight < who->cwnd)
                        who->cwnd = who->in_flight;
                who->cwnd /= 2;
@@ -659,8 +664,12 @@ static void cs_keep_waiting(struct pcs_rpc *ep, struct 
pcs_msg *req, struct pcs_
                }
 
                if (!who->cwr_state) {
-                       FUSE_KDTRACE(cc_from_csset(cs->css)->fc, "Congestion 
window on CS" NODE_FMT " reducing %d/%d/%d", NODE_ARGS(h->xid.origin),
-                                    who->in_flight, who->eff_cwnd, who->cwnd);
+                       FUSE_KTRACE(cc_from_csset(cs->css)->fc, "Congestion 
window on CS" NODE_FMT " reducing %d/%d/%d", NODE_ARGS(h->xid.origin),
+                                   who->in_flight, who->eff_cwnd, who->cwnd);
+                       if (who->cwnd >= PCS_CS_INIT_CWND)
+                               who->ssthresh = who->cwnd;
+                       else
+                               who->ssthresh = PCS_CS_INIT_CWND;
                        if (who->in_flight < who->cwnd)
                                who->cwnd = who->in_flight;
                        who->cwnd /= 2;
@@ -899,9 +908,14 @@ unsigned int cs_get_avg_in_flight(struct pcs_cs *cs)
                                cs->in_flight_avg >>= interval;
                        }
                        if (cs->cwnd > PCS_CS_INIT_CWND) {
-                               cs->cwnd = PCS_CS_INIT_CWND;
-                               if (cs->eff_cwnd > PCS_CS_INIT_CWND)
-                                       cs->eff_cwnd = PCS_CS_INIT_CWND;
+                               unsigned int cwnd = PCS_CS_INIT_CWND;
+                               TRACE("Congestion window on CS#" NODE_FMT " was 
not used, shrink %u -> %u", NODE_ARGS(cs->id),
+                                       cs->cwnd, cwnd);
+                               if (cs->cwnd > cs->ssthresh)
+                                       cs->ssthresh = cs->cwnd;
+                               cs->cwnd = cwnd;
+                               if (cs->eff_cwnd > cwnd)
+                                       cs->eff_cwnd = cwnd;
                        }
                }
        }
@@ -962,6 +976,8 @@ void cs_cwnd_use_or_lose(struct pcs_cs *cs)
 
                        FUSE_KTRACE(cc_from_csset(cs->css)->fc, "Congestion 
window on CS#" NODE_FMT " was not used, shrink %u -> %u", NODE_ARGS(cs->id),
                                    cs->cwnd, cwnd);
+                       if (cs->cwnd > cs->ssthresh)
+                               cs->ssthresh = cs->cwnd;
                        cs->cwnd = cwnd;
                        if (cs->eff_cwnd > cwnd)
                                cs->eff_cwnd = cwnd;
diff --git a/fs/fuse/kio/pcs/pcs_cs.h b/fs/fuse/kio/pcs/pcs_cs.h
index 1fb40936d046..513d53539211 100644
--- a/fs/fuse/kio/pcs/pcs_cs.h
+++ b/fs/fuse/kio/pcs/pcs_cs.h
@@ -52,6 +52,7 @@ struct pcs_cs {
        unsigned int            in_flight;
        unsigned int            eff_cwnd;
        unsigned int            cwnd;
+       unsigned int            ssthresh;
        int                     cwr_state;
        atomic_t                latency_avg;
        unsigned int            net_latency_avg;
diff --git a/fs/fuse/kio/pcs/pcs_map.c b/fs/fuse/kio/pcs/pcs_map.c
index 0af852615ff4..9c3762c92315 100644
--- a/fs/fuse/kio/pcs/pcs_map.c
+++ b/fs/fuse/kio/pcs/pcs_map.c
@@ -1414,6 +1414,11 @@ static void pcs_cs_deaccount(struct pcs_int_request 
*ireq, struct pcs_cs * cs, i
                if (cs->last_latency > iolat_cutoff && ireq->type != 
PCS_IREQ_FLUSH) {
                        unsigned int clamp;
 
+                       if (cs->cwnd >= PCS_CS_INIT_CWND)
+                               cs->ssthresh = cs->cwnd;
+                       else
+                               cs->ssthresh = PCS_CS_INIT_CWND;
+
                        clamp = PCS_CS_INIT_CWND;
                        if (cs->last_latency > iolat_cutoff*8)
                                clamp = PCS_CS_INIT_CWND/8;
@@ -1429,7 +1434,7 @@ static void pcs_cs_deaccount(struct pcs_int_request 
*ireq, struct pcs_cs * cs, i
                } else if (cs->in_flight >= cs->cwnd && !cs->cwr_state && 
worth_to_grow(ireq, cs)) {
                        unsigned int cwnd;
 
-                       if (cs->cwnd < PCS_CS_INIT_CWND)
+                       if (cs->cwnd <= cs->ssthresh)
                                cwnd = cs->cwnd + cost;
                        else
                                cwnd = cs->cwnd + 0x100000000ULL/cs->cwnd;
-- 
2.15.1

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to