Patch series for New Balancing algorithm (Peak) EWMA

Aleksandar Lazic Thu, 05 Feb 2026 16:03:56 -0800

Hi.

here my first attempt to add the (Peak) EWMA to HAProxy.


Regards
Aleks

From 5ce148a3621d9e97d3118e582556c763dbc2667d Mon Sep 17 00:00:00 2001
From: Aleksandar Lazic <[email protected]>
Date: Fri, 6 Feb 2026 00:59:46 +0100
Subject: [PATCH 8/8] MEDIUM/backend: Add Peak EWMA load balancing algorithm

refer to https://github.com/haproxy/haproxy/issues/1570

Signed-off-by: Aleksandar Lazic <[email protected]>
---
 src/lb_pewma.c | 938 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 938 insertions(+)
 create mode 100644 src/lb_pewma.c

diff --git a/src/lb_pewma.c b/src/lb_pewma.c
new file mode 100644
index 0000000000..69ac79fbe9
--- /dev/null
+++ b/src/lb_pewma.c
@@ -0,0 +1,938 @@
+/*
+ * Peak EWMA load balancing algorithm.
+ *
+ * This algorithm selects the server with the lowest estimated load
+ * computed as: latency * (inflight + 1) / weight, where latency is
+ * the EWMA of total response time (t_time). It combines response time
+ * awareness with connection counting, preferring fast servers over slow
+ * ones. The tree structure and concurrency logic is derived from the
+ * Fast Weighted Least Connection (FWLC) algorithm.
+ *
+ * Copyright 2026 Aleksandar Lazic <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <import/eb32tree.h>
+#include <haproxy/api.h>
+#include <haproxy/backend.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/queue.h>
+#include <haproxy/server-t.h>
+#include <haproxy/task.h>
+#include <haproxy/tools.h>
+
+/* We reuse the same tree element layout as FWLC. The struct fwlc_tree_elt
+ * type is forward-declared in server-t.h via pointer members. We provide
+ * the full definition here so that we can access its fields.
+ */
+struct fwlc_tree_elt {
+	struct mt_list srv_list[PEWMA_LISTS_NB];
+	struct mt_list free_list;
+	struct eb32_node lb_node;
+	unsigned int elements;
+};
+
+DECLARE_STATIC_TYPED_POOL(pool_head_pewma_elt, "pewma_tree_elt", struct fwlc_tree_elt);
+
+#define PEWMA_LBPRM_SEQ(lbprm)		((lbprm) & 0xffffffff)
+#define PEWMA_LBPRM_SMALLEST(lbprm)	((lbprm) >> 32)
+
+/*
+ * Atomically try to update the sequence number, and the smallest key for which there is at least one server.
+ * Returns 1 on success, and 0 on failure.
+ */
+static int pewma_set_seq_and_smallest(struct lbprm *lbprm, uint64_t current, unsigned int seq, unsigned int smallest)
+{
+	uint64_t dst_nb = seq | ((uint64_t)smallest << 32);
+	int ret;
+#if defined(HA_CAS_IS_8B)
+	ret =  _HA_ATOMIC_CAS(&lbprm->lb_seq, &current, dst_nb);
+#elif defined(HA_HAVE_CAS_DW)
+	ret = _HA_ATOMIC_DWCAS(&lbprm->lb_seq, &current, &dst_nb);
+#else
+	__decl_thread(static HA_SPINLOCK_T seq_lock);
+
+	HA_SPIN_LOCK(OTHER_LOCK, &seq_lock);
+	if (lbprm->lb_seq == current) {
+		lbprm->lb_seq = dst_nb;
+		ret = 1;
+	} else
+		ret = 0;
+	HA_SPIN_UNLOCK(OTHER_LOCK, &seq_lock);
+#endif
+	return ret;
+
+}
+
+/* Remove a server from a tree. It must have previously been dequeued. This
+ * function is meant to be called when a server is going down or has its
+ * weight disabled.
+ *
+ * The server's lock and the lbprm's lock must be held.
+ */
+static inline void pewma_remove_from_tree(struct server *s)
+{
+	s->lb_tree = NULL;
+}
+
+/*
+ * Remove anything allocated by the proxy
+ */
+static void pewma_proxy_deinit(struct proxy *p)
+{
+	struct fwlc_tree_elt *tree_elt;
+
+	while ((tree_elt = MT_LIST_POP(&p->lbprm.lb_free_list, struct fwlc_tree_elt *, free_list)) != NULL) {
+		pool_free(pool_head_pewma_elt, tree_elt);
+	}
+}
+
+/*
+ * Remove anything allocated by the server
+ */
+static void pewma_server_deinit(struct server *s)
+{
+	if (s->free_elt) {
+		pool_free(pool_head_pewma_elt, s->free_elt);
+		s->free_elt = NULL;
+	}
+}
+
+/* simply removes a server from a tree.
+ *
+ * The lbprm's lock must be held.
+ */
+static inline void pewma_dequeue_srv(struct server *s)
+{
+	struct fwlc_tree_elt *tree_elt = s->tree_elt;
+	unsigned int elts;
+
+	MT_LIST_DELETE(&s->lb_mt_list);
+	if (tree_elt) {
+		elts = _HA_ATOMIC_FETCH_SUB(&tree_elt->elements, 1);
+		/* We are the last element, we can nuke the node */
+		if (elts == 1) {
+			if (PEWMA_LBPRM_SMALLEST(s->proxy->lbprm.lb_seq) == tree_elt->lb_node.key) {
+				/*
+				 * We were the smallest one, and now we're
+				 * gone, reset it
+				 */
+				/*
+				 * We're holding the lbprm lock so this should never fail,
+				 * as nobody should be around to modify it
+				 */
+				do {
+				} while (pewma_set_seq_and_smallest(&s->proxy->lbprm, s->proxy->lbprm.lb_seq, PEWMA_LBPRM_SEQ(s->proxy->lbprm.lb_seq) + 1, 0) == 0 && __ha_cpu_relax());
+
+			}
+			eb32_delete(&tree_elt->lb_node);
+		}
+	}
+	s->tree_elt = NULL;
+	if (s->free_elt) {
+		pool_free(pool_head_pewma_elt, s->free_elt);
+		s->free_elt = NULL;
+	}
+}
+
+/*
+ * Allocate a tree element, either from the free list, from an element provided, or
+ * from allocation.
+ * Must be called with the wrlock
+ */
+static struct fwlc_tree_elt *pewma_alloc_tree_elt(struct proxy *p, struct fwlc_tree_elt *allocated_elt)
+{
+	struct fwlc_tree_elt *tree_elt = NULL;
+	int i = 0;
+
+	if (p->lbprm.lb_free_list_nb >= PEWMA_MIN_FREE_ENTRIES) {
+		while ((tree_elt = MT_LIST_POP(&p->lbprm.lb_free_list, struct fwlc_tree_elt *, free_list)) != NULL) {
+			MT_LIST_APPEND(&p->lbprm.lb_free_list, &tree_elt->free_list);
+			if (tree_elt->elements == 0) {
+				eb32_delete(&tree_elt->lb_node);
+				if (i == 0) {
+					struct fwlc_tree_elt *tmptree;
+
+					tmptree = MT_LIST_POP(&p->lbprm.lb_free_list, struct fwlc_tree_elt *, free_list);
+					/*
+					 * Check if the next element still contains servers, and if not,
+					 * just free it, to do some cleanup.
+					 */
+					if (tmptree && tmptree->elements == 0) {
+						eb32_delete(&tmptree->lb_node);
+						pool_free(pool_head_pewma_elt, tmptree);
+						p->lbprm.lb_free_list_nb--;
+					} else if (tmptree)
+						MT_LIST_APPEND(&p->lbprm.lb_free_list, &tmptree->free_list);
+				}
+				return tree_elt;
+		}
+			i++;
+			if (i > 3)
+				break;
+		}
+	}
+	if (!allocated_elt) {
+		tree_elt = pool_alloc(pool_head_pewma_elt);
+		if (!tree_elt)
+			return NULL;
+	} else
+		tree_elt = allocated_elt;
+
+	for (i = 0; i < PEWMA_LISTS_NB; i++) {
+		MT_LIST_INIT(&tree_elt->srv_list[i]);
+	}
+	MT_LIST_INIT(&tree_elt->free_list);
+	MT_LIST_APPEND(&p->lbprm.lb_free_list, &tree_elt->free_list);
+	p->lbprm.lb_free_list_nb++;
+	tree_elt->elements = 0;
+	return tree_elt;
+}
+
+/*
+ * Return the tree element for the provided key, allocate it first if needed.
+ * Must be called with the lbprm lock held.
+ */
+static struct fwlc_tree_elt *pewma_get_tree_elt(struct server *s, u32 key)
+{
+	struct eb32_node *node;
+	struct fwlc_tree_elt *tree_elt = NULL;
+
+	node = eb32_lookup(s->lb_tree, key);
+	if (node)
+		tree_elt = container_of(node, struct fwlc_tree_elt, lb_node);
+	if (!tree_elt) {
+		/* No element available, we have to allocate one */
+		tree_elt = pewma_alloc_tree_elt(s->proxy, NULL);
+		if (!tree_elt)
+			return NULL;
+		tree_elt->lb_node.key = key;
+		eb32_insert(s->lb_tree, &tree_elt->lb_node);
+	}
+	return tree_elt;
+}
+
+/* Queue a server in its associated tree, assuming the <eweight> is >0.
+ * Servers are sorted by latency * (#conns+1) / weight. To ensure maximum
+ * accuracy, we use latency * (#conns+1) * SRV_EWGHT_MAX / eweight as the
+ * sorting key. The latency is the EWMA of total response time (t_time).
+ * When no latency data is available (cold start), latency defaults to 1,
+ * degrading to FWLC behavior.
+ *
+ * NOTE: Depending on the calling context, we use s->next_eweight or
+ *       s->cur_eweight. The next value is used when the server state is updated
+ *       (because the weight changed for instance). During this step, the server
+ *       state is not yet committed. The current value is used to reposition the
+ *       server in the tree. This happens when the server is used.
+ *
+ * The lbprm's lock must be held.
+ */
+static inline void pewma_queue_srv(struct server *s, unsigned int eweight)
+{
+	struct fwlc_tree_elt *tree_elt;
+	unsigned int inflight = _HA_ATOMIC_LOAD(&s->served) + _HA_ATOMIC_LOAD(&s->queueslength);
+	unsigned int list_nb;
+	unsigned int latency;
+	u32 key;
+
+	if (inflight) {
+		unsigned long long raw;
+
+		latency = swrate_avg(_HA_ATOMIC_LOAD(&s->counters.t_time), TIME_STATS_SAMPLES);
+		if (!latency)
+			latency = 1;
+		raw = (unsigned long long)latency * (inflight + 1) * SRV_EWGHT_MAX / eweight;
+		key = (raw > (unsigned long long)UINT32_MAX) ? UINT32_MAX : (u32)raw;
+	} else {
+		key = 0;
+	}
+
+	tree_elt = pewma_get_tree_elt(s, key);
+	if (tree_elt == NULL) {
+		/*
+		 * We failed to allocate memory for the tree_elt, just stop
+		 * now and schedule the requeue tasklet which will take care
+		 * of the queueing later.
+		 * If the tasklet doesn't exist yet, then there is nothing to
+		 * do, as it will be eventually scheduled after being created.
+		 */
+		tasklet_wakeup(s->requeue_tasklet);
+		return;
+	}
+	list_nb = statistical_prng_range(PEWMA_LISTS_NB);
+	MT_LIST_APPEND(&tree_elt->srv_list[list_nb], &s->lb_mt_list);
+	s->tree_elt = tree_elt;
+	_HA_ATOMIC_INC(&tree_elt->elements);
+	if (PEWMA_LBPRM_SMALLEST(s->proxy->lbprm.lb_seq) > key) {
+		/*
+		 * We're holding the lbprm lock so this should never fail,
+		 * as nobody should be around to modify it
+		 */
+		do {
+		} while (pewma_set_seq_and_smallest(&s->proxy->lbprm, s->proxy->lbprm.lb_seq, PEWMA_LBPRM_SEQ(s->proxy->lbprm.lb_seq) + 1, key) == 0);
+	}
+}
+
+/*
+ * Loop across the different lists until we find an unlocked one, and lock it.
+ */
+static __inline struct mt_list pewma_lock_target_list(struct fwlc_tree_elt *tree_elt)
+{
+	struct mt_list list = {NULL, NULL};
+	int i;
+	int dst_list;
+
+
+	dst_list = statistical_prng_range(PEWMA_LISTS_NB);
+
+	while (list.next == NULL) {
+		for (i = 0; i < PEWMA_LISTS_NB; i++) {
+			list = mt_list_try_lock_prev(&tree_elt->srv_list[(dst_list + i) % PEWMA_LISTS_NB]);
+			if (list.next != NULL)
+				break;
+		}
+	}
+	return list;
+}
+
+/*
+ * Calculate the key to be used for a given server.
+ * Peak EWMA key = latency * (inflight + 1) * SRV_EWGHT_MAX / eweight
+ */
+static inline unsigned int pewma_get_key(struct server *s)
+{
+	unsigned int inflight;
+	unsigned int eweight;
+	unsigned int latency;
+	unsigned long long raw_key;
+
+	inflight = _HA_ATOMIC_LOAD(&s->served) + _HA_ATOMIC_LOAD(&s->queueslength);
+	eweight = _HA_ATOMIC_LOAD(&s->cur_eweight);
+
+	/* Use the existing EWMA of total response time (t_time).
+	 * swrate_avg() recovers the average from the sliding window sum.
+	 * If no data yet (cold start), use 1 to degrade to pure FWLC behavior.
+	 */
+	latency = swrate_avg(_HA_ATOMIC_LOAD(&s->counters.t_time), TIME_STATS_SAMPLES);
+	if (!latency)
+		latency = 1;
+
+	if (!inflight)
+		return 0;
+
+	/* Use 64-bit intermediate to prevent overflow, then saturate to u32 */
+	raw_key = (unsigned long long)latency * (inflight + 1) * SRV_EWGHT_MAX / (eweight ? eweight : 1);
+	return (raw_key > (unsigned long long)UINT32_MAX) ? UINT32_MAX : (unsigned int)raw_key;
+}
+
+/*
+ * Only one thread will try to update a server position at a given time,
+ * thanks to the lb_lock. However that means that by the time we are done
+ * with the update, a new one might be needed, so check for that and
+ * schedule the tasklet if needed, once we dropped the lock.
+ */
+static inline void pewma_check_srv_key(struct server *s, unsigned int expected)
+{
+	unsigned int key = pewma_get_key(s);
+
+	if (key != expected && s->requeue_tasklet)
+		tasklet_wakeup(s->requeue_tasklet);
+}
+
+/* Re-position the server in the Peak EWMA tree after it has been assigned one
+ * connection or after it has released one. Note that it is possible that
+ * the server has been moved out of the tree due to failed health-checks.
+ * The lbprm's lock will be used.
+ */
+static void pewma_srv_reposition(struct server *s)
+{
+	struct mt_list to_unlock;
+	struct fwlc_tree_elt *tree_elt = NULL, *allocated_elt = NULL;
+	struct eb32_node *node;
+	struct mt_list list;
+	uint64_t cur_seq = 0;
+	unsigned int eweight = _HA_ATOMIC_LOAD(&s->cur_eweight);
+	unsigned int new_key;
+	unsigned int smallest;
+	int srv_lock;
+
+	HA_RWLOCK_RDLOCK(LBPRM_LOCK, &s->proxy->lbprm.lock);
+	new_key = pewma_get_key(s);
+	/* some calls will be made for no change (e.g connect_server() after
+	 * assign_server(). Let's check that first.
+	 */
+	if ((s->tree_elt && s->tree_elt->lb_node.node.leaf_p && eweight &&
+	    s->tree_elt->lb_node.key == new_key) || !s->lb_tree) {
+		HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &s->proxy->lbprm.lock);
+		return;
+	}
+
+	srv_lock = HA_ATOMIC_XCHG(&s->lb_lock, 1);
+	/* Somebody else is updating that server, give up */
+	if (srv_lock == 1) {
+		HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &s->proxy->lbprm.lock);
+		return;
+	}
+
+	node = eb32_lookup(s->lb_tree, new_key);
+	if (node)
+		tree_elt = container_of(node, struct fwlc_tree_elt, lb_node);
+		/*
+		 * It is possible that s->tree_elt was changed since we checked
+		 * As s->tree_elt is only changed while holding s->lb_lock,
+		 * check again now that we acquired it, and if we're using
+		 * the right element, do nothing.
+		 */
+	if (s->tree_elt && tree_elt == s->tree_elt) {
+		HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &s->proxy->lbprm.lock);
+		_HA_ATOMIC_STORE(&s->lb_lock, 0);
+		pewma_check_srv_key(s, new_key);
+		return;
+	}
+	/*
+	 * We have to allocate a new tree element, and/or remove the
+	 * previous element, we will modify the tree, so let's get the write
+	 * lock.
+	 */
+	if (!tree_elt) {
+		unsigned int new_new_key;
+
+		/*
+		 * We don't want to allocate something while holding the lock,
+		 * so make sure we have something allocated before.
+		 */
+		if (s->free_elt != NULL) {
+			allocated_elt = s->free_elt;
+			s->free_elt = NULL;
+		} else
+			allocated_elt = pool_alloc(pool_head_pewma_elt);
+		if (HA_RWLOCK_TRYRDTOWR(LBPRM_LOCK, &s->proxy->lbprm.lock) != 0) {
+			/* there's already some contention on the tree's lock, there's
+			 * no point insisting. Better wake up the server's tasklet that
+			 * will let this or another thread retry later. For the time
+			 * being, the server's apparent load is slightly inaccurate but
+			 * we don't care, if there is contention, it will self-regulate.
+			 */
+			if (s->requeue_tasklet)
+				tasklet_wakeup(s->requeue_tasklet);
+			HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &s->proxy->lbprm.lock);
+			s->free_elt = allocated_elt;
+			_HA_ATOMIC_STORE(&s->lb_lock, 0);
+			return;
+		}
+
+		/* we might have been waiting for a while on the lock above
+		 * so it's worth testing again because other threads are very
+		 * likely to have released a connection or taken one leading
+		 * to our target value (50% of the case in measurements).
+		 */
+
+		new_new_key = pewma_get_key(s);
+		if (new_new_key != new_key) {
+			if (s->tree_elt &&
+			    s->tree_elt->lb_node.node.leaf_p &&
+			    eweight && s->tree_elt->lb_node.key == new_new_key) {
+				/* Okay after all we have nothing to do */
+				HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &s->proxy->lbprm.lock);
+				s->free_elt = allocated_elt;
+				_HA_ATOMIC_STORE(&s->lb_lock, 0);
+				pewma_check_srv_key(s, new_new_key);
+				return;
+			}
+			node = eb32_lookup(s->lb_tree, new_new_key);
+			if (node) {
+				tree_elt = container_of(node, struct fwlc_tree_elt, lb_node);
+				HA_RWLOCK_WRTORD(LBPRM_LOCK, &s->proxy->lbprm.lock);
+				s->free_elt = allocated_elt;
+				allocated_elt = NULL;
+			} else
+				tree_elt = NULL;
+			new_key = new_new_key;
+		}
+	}
+
+	/*
+	 * Now we increment the number of elements in the new tree_elt,
+	 * we change our sequence number and smallest, and we then
+	 * decrement the number of elements in the old tree_elt.
+	 * It is important to keep this sequencing, as pewma_get_next_server()
+	 * uses the number of elements to know if there is something to look for,
+	 * and we want to make sure we do not miss a server.
+	 */
+	if (!tree_elt) {
+		/*
+		 * There were no tree element matching our key,
+		 * allocate one and insert it into the tree
+		 */
+		tree_elt = pewma_alloc_tree_elt(s->proxy, allocated_elt);
+		if (tree_elt == NULL) {
+			/* We failed to allocate memory, just try again later */
+			HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &s->proxy->lbprm.lock);
+			_HA_ATOMIC_STORE(&s->lb_lock, 0);
+			if (s->requeue_tasklet)
+				tasklet_wakeup(s->requeue_tasklet);
+			return;
+		}
+		if (tree_elt == allocated_elt)
+			allocated_elt = NULL;
+		tree_elt->lb_node.key = new_key;
+		tree_elt->elements = 1;
+		__ha_barrier_store();
+		/* If we allocated, then we hold the write lock */
+		eb32_insert(s->lb_tree, &tree_elt->lb_node);
+		HA_RWLOCK_WRTORD(LBPRM_LOCK, &s->proxy->lbprm.lock);
+	} else {
+		_HA_ATOMIC_INC(&tree_elt->elements);
+	}
+
+	__ha_barrier_store();
+	/*
+	 * Update the sequence number, and the smallest if needed.
+	 * We always have to do it, even if we're not actually
+	 * updating the smallest one, otherwise we'll get an
+	 * ABA problem and a server may be missed when looked up.
+	 * The only time we don't have to do it if is another thread
+	 * increased it, and the new smallest element is not
+	 * higher than our new key.
+	 */
+	do {
+                unsigned int tmpsmallest;
+		uint64_t newcurseq = _HA_ATOMIC_LOAD(&s->proxy->lbprm.lb_seq);
+
+		if (cur_seq != 0 && PEWMA_LBPRM_SEQ(newcurseq) >
+		   PEWMA_LBPRM_SEQ(cur_seq) && new_key >= PEWMA_LBPRM_SMALLEST(newcurseq))
+			break;
+
+		cur_seq = newcurseq;
+                tmpsmallest = PEWMA_LBPRM_SMALLEST(cur_seq);
+                if (new_key > tmpsmallest)
+                        smallest = tmpsmallest;
+		else
+                        smallest = new_key;
+
+        } while (pewma_set_seq_and_smallest(&s->proxy->lbprm, cur_seq, PEWMA_LBPRM_SEQ(cur_seq) + 1, smallest) == 0 && __ha_cpu_relax());
+
+	__ha_barrier_store();
+
+	if (likely(s->tree_elt)) {
+		_HA_ATOMIC_DEC(&s->tree_elt->elements);
+
+		/*
+		 * Now lock the existing element, and its target list.
+		 * To prevent a deadlock, we always lock the one
+		 * with the lowest key first.
+		 */
+		if (new_key < s->tree_elt->lb_node.key) {
+			to_unlock = mt_list_lock_full(&s->lb_mt_list);
+			list = pewma_lock_target_list(tree_elt);
+		} else {
+			list = pewma_lock_target_list(tree_elt);
+			to_unlock = mt_list_lock_full(&s->lb_mt_list);
+		}
+
+		/*
+		 * Unlock the old list, the element is now
+		 * no longer in it.
+		 */
+		mt_list_unlock_link(to_unlock);
+	} else
+		list = pewma_lock_target_list(tree_elt);
+
+	/*
+	 * Add the element to the new list, and unlock it.
+	 */
+	mt_list_unlock_full(&s->lb_mt_list, list);
+
+	s->tree_elt = tree_elt;
+
+	HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &s->proxy->lbprm.lock);
+
+	if (allocated_elt)
+		s->free_elt = allocated_elt;
+
+	__ha_barrier_store();
+	_HA_ATOMIC_STORE(&s->lb_lock, 0);
+
+	pewma_check_srv_key(s, new_key);
+}
+
+/* This function updates the server trees according to server <srv>'s new
+ * state. It should be called when server <srv>'s status changes to down.
+ * It is not important whether the server was already down or not. It is not
+ * important either that the new state is completely down (the caller may not
+ * know all the variables of a server's state).
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void pewma_set_server_status_down(struct server *srv)
+{
+	struct proxy *p = srv->proxy;
+
+	if (!srv_lb_status_changed(srv))
+		return;
+
+	if (srv_willbe_usable(srv))
+		goto out_update_state;
+	HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+
+	if (!srv_currently_usable(srv))
+		/* server was already down */
+		goto out_update_backend;
+
+	if (srv->flags & SRV_F_BACKUP) {
+		p->lbprm.tot_wbck -= srv->cur_eweight;
+		p->srv_bck--;
+
+		if (srv == p->lbprm.fbck) {
+			/* we lost the first backup server in a single-backup
+			 * configuration, we must search another one.
+			 */
+			struct server *srv2 = p->lbprm.fbck;
+			do {
+				srv2 = srv2->next;
+			} while (srv2 &&
+				 !((srv2->flags & SRV_F_BACKUP) &&
+				   srv_willbe_usable(srv2)));
+			p->lbprm.fbck = srv2;
+		}
+	} else {
+		p->lbprm.tot_wact -= srv->cur_eweight;
+		p->srv_act--;
+	}
+
+	pewma_dequeue_srv(srv);
+	pewma_remove_from_tree(srv);
+
+out_update_backend:
+	/* check/update tot_used, tot_weight */
+	update_backend_weight(p);
+	HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ out_update_state:
+	srv_lb_commit_status(srv);
+}
+
+/* This function updates the server trees according to server <srv>'s new
+ * state. It should be called when server <srv>'s status changes to up.
+ * It is not important whether the server was already down or not. It is not
+ * important either that the new state is completely UP (the caller may not
+ * know all the variables of a server's state). This function will not change
+ * the weight of a server which was already up.
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void pewma_set_server_status_up(struct server *srv)
+{
+	struct proxy *p = srv->proxy;
+
+	if (!srv_lb_status_changed(srv))
+		return;
+
+	if (!srv_willbe_usable(srv))
+		goto out_update_state;
+
+	HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+	if (srv_currently_usable(srv))
+		/* server was already up */
+		goto out_update_backend;
+
+	if (srv->flags & SRV_F_BACKUP) {
+		srv->lb_tree = &p->lbprm.pewma.bck;
+		p->lbprm.tot_wbck += srv->next_eweight;
+		p->srv_bck++;
+
+		if (!(p->options & PR_O_USE_ALL_BK)) {
+			if (!p->lbprm.fbck) {
+				/* there was no backup server anymore */
+				p->lbprm.fbck = srv;
+			} else {
+				/* we may have restored a backup server prior to fbck,
+				 * in which case it should replace it.
+				 */
+				struct server *srv2 = srv;
+				do {
+					srv2 = srv2->next;
+				} while (srv2 && (srv2 != p->lbprm.fbck));
+				if (srv2)
+					p->lbprm.fbck = srv;
+			}
+		}
+	} else {
+		srv->lb_tree = &p->lbprm.pewma.act;
+		p->lbprm.tot_wact += srv->next_eweight;
+		p->srv_act++;
+	}
+
+	/* note that eweight cannot be 0 here */
+	pewma_queue_srv(srv, srv->next_eweight);
+
+ out_update_backend:
+	/* check/update tot_used, tot_weight */
+	update_backend_weight(p);
+	HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ out_update_state:
+	srv_lb_commit_status(srv);
+}
+
+/* This function must be called after an update to server <srv>'s effective
+ * weight. It may be called after a state change too.
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void pewma_update_server_weight(struct server *srv)
+{
+	int old_state, new_state;
+	struct proxy *p = srv->proxy;
+
+	if (!srv_lb_status_changed(srv))
+		return;
+
+	/* If changing the server's weight changes its state, we simply apply
+	 * the procedures we already have for status change. If the state
+	 * remains down, the server is not in any tree, so it's as easy as
+	 * updating its values. If the state remains up with different weights,
+	 * there are some computations to perform to find a new place and
+	 * possibly a new tree for this server.
+	 */
+
+	old_state = srv_currently_usable(srv);
+	new_state = srv_willbe_usable(srv);
+
+	if (!old_state && !new_state) {
+		srv_lb_commit_status(srv);
+		return;
+	}
+	else if (!old_state && new_state) {
+		pewma_set_server_status_up(srv);
+		return;
+	}
+	else if (old_state && !new_state) {
+		pewma_set_server_status_down(srv);
+		return;
+	}
+
+	HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+	if (srv->lb_tree)
+		pewma_dequeue_srv(srv);
+
+	if (srv->flags & SRV_F_BACKUP) {
+		p->lbprm.tot_wbck += srv->next_eweight - srv->cur_eweight;
+		srv->lb_tree = &p->lbprm.pewma.bck;
+	} else {
+		p->lbprm.tot_wact += srv->next_eweight - srv->cur_eweight;
+		srv->lb_tree = &p->lbprm.pewma.act;
+	}
+
+	pewma_queue_srv(srv, srv->next_eweight);
+
+	update_backend_weight(p);
+	HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+	srv_lb_commit_status(srv);
+}
+
+/* This function is responsible for building the trees in case of Peak EWMA.
+ * It also sets p->lbprm.wdiv to the eweight to uweight ratio. Both active
+ * and backup groups are initialized.
+ */
+void pewma_init_server_tree(struct proxy *p)
+{
+	struct server *srv;
+	struct eb_root init_head = EB_ROOT;
+
+	p->lbprm.set_server_status_up   = pewma_set_server_status_up;
+	p->lbprm.set_server_status_down = pewma_set_server_status_down;
+	p->lbprm.update_server_eweight  = pewma_update_server_weight;
+	p->lbprm.server_take_conn = pewma_srv_reposition;
+	p->lbprm.server_drop_conn = pewma_srv_reposition;
+	p->lbprm.server_requeue   = pewma_srv_reposition;
+	p->lbprm.server_deinit    = pewma_server_deinit;
+	p->lbprm.proxy_deinit     = pewma_proxy_deinit;
+
+	p->lbprm.wdiv = BE_WEIGHT_SCALE;
+	for (srv = p->srv; srv; srv = srv->next) {
+		srv->next_eweight = (srv->uweight * p->lbprm.wdiv + p->lbprm.wmult - 1) / p->lbprm.wmult;
+		srv_lb_commit_status(srv);
+	}
+
+	p->lbprm.lb_seq = 0;
+
+	recount_servers(p);
+	update_backend_weight(p);
+
+	p->lbprm.pewma.act = init_head;
+	p->lbprm.pewma.bck = init_head;
+
+	/* queue active and backup servers in two distinct groups */
+	for (srv = p->srv; srv; srv = srv->next) {
+		if (!srv_currently_usable(srv))
+			continue;
+		srv->lb_tree = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.pewma.bck : &p->lbprm.pewma.act;
+		pewma_queue_srv(srv, srv->next_eweight);
+	}
+}
+
+/* Return next server from the Peak EWMA tree in backend <p>. If the tree is
+ * empty, return NULL. Saturated servers are skipped.
+ *
+ * The lbprm's lock will be used in R/O mode. The server's lock is not used.
+ */
+struct server *pewma_get_next_server(struct proxy *p, struct server *srvtoavoid)
+{
+	struct server *srv, *avoided;
+	struct eb32_node *node;
+	uint64_t curseq;
+	int found = 0;
+
+	srv = avoided = NULL;
+
+	HA_RWLOCK_RDLOCK(LBPRM_LOCK, &p->lbprm.lock);
+	curseq = _HA_ATOMIC_LOAD(&p->lbprm.lb_seq);
+ redo:
+	if (p->srv_act)
+		node = eb32_lookup_ge(&p->lbprm.pewma.act, PEWMA_LBPRM_SMALLEST(curseq));
+	else if (p->lbprm.fbck) {
+		srv = p->lbprm.fbck;
+		goto out;
+	}
+	else if (p->srv_bck)
+		node = eb32_lookup_ge(&p->lbprm.pewma.bck, PEWMA_LBPRM_SMALLEST(curseq));
+	else {
+		srv = NULL;
+		goto out;
+	}
+
+	while (node) {
+		struct fwlc_tree_elt *tree_elt;
+		struct server *s;
+		int unusable = 0;
+		int orig_nb;
+		int i = 0;
+
+		tree_elt = eb32_entry(node, struct fwlc_tree_elt, lb_node);
+		orig_nb = statistical_prng_range(PEWMA_LISTS_NB);
+
+		while (_HA_ATOMIC_LOAD(&tree_elt->elements) > unusable) {
+			struct mt_list mt_list;
+			mt_list.next = _HA_ATOMIC_LOAD(&tree_elt->srv_list[(i + orig_nb) % PEWMA_LISTS_NB].next);
+
+			if (mt_list.next != &tree_elt->srv_list[(i + orig_nb) % PEWMA_LISTS_NB] && mt_list.next != MT_LIST_BUSY) {
+				unsigned int eweight;
+				unsigned int planned_inflight;
+				unsigned int lat;
+				s = container_of(mt_list.next, struct server, lb_mt_list);
+				eweight = _HA_ATOMIC_LOAD(&s->cur_eweight);
+
+				/* Reverse the key computation to get planned inflight.
+				 * key = latency * (inflight + 1) * SRV_EWGHT_MAX / eweight
+				 * => inflight + 1 = key * eweight / (SRV_EWGHT_MAX * latency)
+				 */
+				lat = swrate_avg(_HA_ATOMIC_LOAD(&s->counters.t_time), TIME_STATS_SAMPLES);
+				if (!lat) lat = 1;
+				planned_inflight = (unsigned long long)tree_elt->lb_node.key * eweight / ((unsigned long long)SRV_EWGHT_MAX * lat);
+
+				if (!s->maxconn || s->served + s->queueslength < srv_dynamic_maxconn(s) + s->maxqueue) {
+					if (_HA_ATOMIC_LOAD(&s->served) + _HA_ATOMIC_LOAD(&s->queueslength) > planned_inflight + 2) {
+						/*
+						 * The server has more requests than expected,
+						 * let's try to reposition it, to avoid too
+						 * many threads using the same server at the
+						 * same time. From the moment we release the
+						 * lock, we cannot trust the node nor tree_elt
+						 * anymore, so we need to loop back to the
+						 * beginning.
+						 */
+						if (i >= PEWMA_LISTS_NB) {
+							HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+							pewma_srv_reposition(s);
+							HA_RWLOCK_RDLOCK(LBPRM_LOCK, &p->lbprm.lock);
+							goto redo;
+						}
+						i++;
+						continue;
+					}
+                                        if (s != srvtoavoid) {
+                                                srv = s;
+                                                found = 1;
+                                                break;
+                                        }
+					avoided = s;
+				}
+				unusable++;
+				i++;
+			} else if (mt_list.next == &tree_elt->srv_list[(i + orig_nb) % PEWMA_LISTS_NB]) {
+				i++;
+				continue;
+			} else {
+				i++;
+				continue;
+			}
+		}
+		if (found)
+			break;
+
+		do {
+			node = eb32_next(node);
+		} while (node && node->key < PEWMA_LBPRM_SMALLEST(curseq));
+
+		if (node) {
+			uint64_t newcurseq = HA_ATOMIC_LOAD(&p->lbprm.lb_seq);
+
+			/*
+			 * If we have a bigger element than the smallest recorded, and we're up to date,
+			 * update the smallest one.
+			 */
+			if (likely(newcurseq == curseq && PEWMA_LBPRM_SMALLEST(newcurseq) < node->key)) {
+				if (pewma_set_seq_and_smallest(&p->lbprm, curseq, PEWMA_LBPRM_SEQ(curseq), node->key) != 0) {
+					curseq = PEWMA_LBPRM_SEQ(curseq) | ((uint64_t)node->key << 32);
+					__ha_barrier_store();
+					continue;
+				}
+
+			}
+			/*
+			 * Somebody added a new server in node we already skipped, so retry from the beginning.
+			 */
+			if (unlikely(PEWMA_LBPRM_SMALLEST(newcurseq) < node->key && PEWMA_LBPRM_SEQ(newcurseq) != PEWMA_LBPRM_SEQ(curseq))) {
+				curseq = newcurseq;
+				goto redo;
+			}
+			curseq = newcurseq;
+		} else {
+			uint64_t newcurseq = _HA_ATOMIC_LOAD(&p->lbprm.lb_seq);
+
+			/*
+			 * No more node, but somebody changed the tree, so it's
+			 * worth trying again.
+			 */
+			if (PEWMA_LBPRM_SEQ(newcurseq) != PEWMA_LBPRM_SEQ(curseq)) {
+				curseq = newcurseq;
+				goto redo;
+			}
+		}
+	}
+
+	if (!srv)
+		srv = avoided;
+ out:
+	HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+	return srv;
+}
+
+
+/*
+ * Local variables:
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ * End:
+ */
-- 
2.43.0

From 84e9c1e88bf21df2ca1ef9816ba110677a1ab0ba Mon Sep 17 00:00:00 2001
From: Aleksandar Lazic <[email protected]>
Date: Fri, 6 Feb 2026 00:59:31 +0100
Subject: [PATCH 7/8] MEDIUM/backend: Add Peak EWMA load balancing algorithm

refer to https://github.com/haproxy/haproxy/issues/1570

Signed-off-by: Aleksandar Lazic <[email protected]>
---
 include/haproxy/lb_pewma.h | 40 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 include/haproxy/lb_pewma.h

diff --git a/include/haproxy/lb_pewma.h b/include/haproxy/lb_pewma.h
new file mode 100644
index 0000000000..aad9814c7f
--- /dev/null
+++ b/include/haproxy/lb_pewma.h
@@ -0,0 +1,40 @@
+/*
+ * include/haproxy/lb_pewma.h
+ * Peak EWMA load balancing algorithm.
+ *
+ * Copyright 2026 Aleksandar Lazic <[email protected]>
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef _HAPROXY_LB_PEWMA_H
+#define _HAPROXY_LB_PEWMA_H
+
+#include <haproxy/api.h>
+#include <haproxy/lb_pewma-t.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/server-t.h>
+
+void pewma_init_server_tree(struct proxy *p);
+struct server *pewma_get_next_server(struct proxy *p, struct server *srvtoavoid);
+
+#endif /* _HAPROXY_LB_PEWMA_H */
+
+/*
+ * Local variables:
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ * End:
+ */
-- 
2.43.0

From bd524ccac26ae35567f0a62a145f54ef58ec6936 Mon Sep 17 00:00:00 2001
From: Aleksandar Lazic <[email protected]>
Date: Fri, 6 Feb 2026 00:59:16 +0100
Subject: [PATCH 6/8] MEDIUM/backend: Add Peak EWMA load balancing algorithm

refer to https://github.com/haproxy/haproxy/issues/1570

Signed-off-by: Aleksandar Lazic <[email protected]>
---
 include/haproxy/lb_pewma-t.h | 39 ++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 include/haproxy/lb_pewma-t.h

diff --git a/include/haproxy/lb_pewma-t.h b/include/haproxy/lb_pewma-t.h
new file mode 100644
index 0000000000..9bef349521
--- /dev/null
+++ b/include/haproxy/lb_pewma-t.h
@@ -0,0 +1,39 @@
+/*
+ * include/haproxy/lb_pewma-t.h
+ * Types for Peak EWMA load balancing algorithm.
+ *
+ * Copyright 2026 Aleksandar Lazic <[email protected]>
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef _HAPROXY_LB_PEWMA_T_H
+#define _HAPROXY_LB_PEWMA_T_H
+
+#include <import/ebtree-t.h>
+
+struct lb_pewma {
+	struct eb_root act;	/* peak ewma tree on the active servers */
+	struct eb_root bck;	/* peak ewma tree on the backup servers */
+};
+
+#endif /* _HAPROXY_LB_PEWMA_T_H */
+
+/*
+ * Local variables:
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ * End:
+ */
-- 
2.43.0

From 0d2f9ebd82d41fbea495e9d858cff4038d198548 Mon Sep 17 00:00:00 2001
From: Aleksandar Lazic <[email protected]>
Date: Fri, 6 Feb 2026 00:58:58 +0100
Subject: [PATCH 5/8] MEDIUM/backend: Add Peak EWMA load balancing algorithm

refer to https://github.com/haproxy/haproxy/issues/1570

Signed-off-by: Aleksandar Lazic <[email protected]>
---
 src/cfgparse.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/cfgparse.c b/src/cfgparse.c
index 52a4cb8fe0..3a6846c531 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -66,6 +66,7 @@
 #include <haproxy/lb_chash.h>
 #include <haproxy/lb_fas.h>
 #include <haproxy/lb_fwlc.h>
+#include <haproxy/lb_pewma.h>
 #include <haproxy/lb_fwrr.h>
 #include <haproxy/lb_map.h>
 #include <haproxy/lb_ss.h>
@@ -3413,6 +3414,9 @@ int check_config_validity()
 			if ((curproxy->lbprm.algo & BE_LB_PARM) == BE_LB_CB_LC) {
 				curproxy->lbprm.algo |= BE_LB_LKUP_LCTREE | BE_LB_PROP_DYN;
 				fwlc_init_server_tree(curproxy);
+			} else if ((curproxy->lbprm.algo & BE_LB_PARM) == BE_LB_CB_PE) {
+				curproxy->lbprm.algo |= BE_LB_LKUP_PETREE | BE_LB_PROP_DYN;
+				pewma_init_server_tree(curproxy);
 			} else {
 				curproxy->lbprm.algo |= BE_LB_LKUP_FSTREE | BE_LB_PROP_DYN;
 				fas_init_server_tree(curproxy);
-- 
2.43.0

From 816700e686bb173886e6ef4de7105e945bdf3e04 Mon Sep 17 00:00:00 2001
From: Aleksandar Lazic <[email protected]>
Date: Fri, 6 Feb 2026 00:58:43 +0100
Subject: [PATCH 4/8] MEDIUM/backend: Add Peak EWMA load balancing algorithm

refer to https://github.com/haproxy/haproxy/issues/1570

Signed-off-by: Aleksandar Lazic <[email protected]>
---
 src/backend.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/backend.c b/src/backend.c
index 73b39306b9..47ea1c1b1e 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -38,6 +38,7 @@
 #include <haproxy/lb_chash.h>
 #include <haproxy/lb_fas.h>
 #include <haproxy/lb_fwlc.h>
+#include <haproxy/lb_pewma.h>
 #include <haproxy/lb_fwrr.h>
 #include <haproxy/lb_map.h>
 #include <haproxy/lb_ss.h>
@@ -714,6 +715,10 @@ int assign_server(struct stream *s)
 			srv = fwlc_get_next_server(s->be, prev_srv);
 			break;
 
+		case BE_LB_LKUP_PETREE:
+			srv = pewma_get_next_server(s->be, prev_srv);
+			break;
+
 		case BE_LB_LKUP_CHTREE:
 		case BE_LB_LKUP_MAP:
 			if ((s->be->lbprm.algo & BE_LB_KIND) == BE_LB_KIND_RR) {
@@ -3070,6 +3075,8 @@ const char *backend_lb_algo_str(int algo) {
 		return "first";
 	else if (algo == BE_LB_ALGO_LC)
 		return "leastconn";
+	else if (algo == BE_LB_ALGO_PE)
+		return "peak-ewma";
 	else if (algo == BE_LB_ALGO_SH)
 		return "source";
 	else if (algo == BE_LB_ALGO_UH)
@@ -3120,6 +3127,10 @@ int backend_parse_balance(const char **args, char **err, struct proxy *curproxy)
 		curproxy->lbprm.algo &= ~BE_LB_ALGO;
 		curproxy->lbprm.algo |= BE_LB_ALGO_LC;
 	}
+	else if (strcmp(args[0], "peak-ewma") == 0) {
+		curproxy->lbprm.algo &= ~BE_LB_ALGO;
+		curproxy->lbprm.algo |= BE_LB_ALGO_PE;
+	}
 	else if (!strncmp(args[0], "random", 6)) {
 		curproxy->lbprm.algo &= ~BE_LB_ALGO;
 		curproxy->lbprm.algo |= BE_LB_ALGO_RND;
@@ -3300,7 +3311,7 @@ int backend_parse_balance(const char **args, char **err, struct proxy *curproxy)
 		curproxy->lbprm.algo |= BE_LB_ALGO_SS;
 	}
 	else {
-		memprintf(err, "only supports 'roundrobin', 'static-rr', 'leastconn', 'source', 'uri', 'url_param', 'hash', 'hdr(name)', 'rdp-cookie(name)', 'log-hash' and 'sticky' options.");
+		memprintf(err, "only supports 'roundrobin', 'static-rr', 'leastconn', 'peak-ewma', 'source', 'uri', 'url_param', 'hash', 'hdr(name)', 'rdp-cookie(name)', 'log-hash' and 'sticky' options.");
 		return -1;
 	}
 	return 0;
-- 
2.43.0

From ef07ec76a4eccae9e904234778e9fb8b22ebc0d8 Mon Sep 17 00:00:00 2001
From: Aleksandar Lazic <[email protected]>
Date: Fri, 6 Feb 2026 00:58:24 +0100
Subject: [PATCH 3/8] MEDIUM/backend: Add Peak EWMA load balancing algorithm

refer to https://github.com/haproxy/haproxy/issues/1570

Signed-off-by: Aleksandar Lazic <[email protected]>
---
 include/haproxy/defaults.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/haproxy/defaults.h b/include/haproxy/defaults.h
index 04eae5df06..de855b50e7 100644
--- a/include/haproxy/defaults.h
+++ b/include/haproxy/defaults.h
@@ -661,6 +661,18 @@
 #define FWLC_MIN_FREE_ENTRIES 500
 #endif /* FWLC_MIN_FREE_ENTRIES */
 
+/*
+ * Peak EWMA tree constants. Same structure as FWLC but separated
+ * for independent tuning.
+ */
+#ifndef PEWMA_LISTS_NB
+#define PEWMA_LISTS_NB   4
+#endif /* PEWMA_LISTS_NB */
+
+#ifndef PEWMA_MIN_FREE_ENTRIES
+#define PEWMA_MIN_FREE_ENTRIES 500
+#endif /* PEWMA_MIN_FREE_ENTRIES */
+
 /*
  * QUIC
  */
-- 
2.43.0

From ab0fa769d7ba17d97adfc168a5a4abbc3f952a2e Mon Sep 17 00:00:00 2001
From: Aleksandar Lazic <[email protected]>
Date: Fri, 6 Feb 2026 00:58:00 +0100
Subject: [PATCH 2/8] MEDIUM/backend: Add Peak EWMA load balancing algorithm

refer to https://github.com/haproxy/haproxy/issues/1570

Signed-off-by: Aleksandar Lazic <[email protected]>
---
 include/haproxy/backend-t.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/haproxy/backend-t.h b/include/haproxy/backend-t.h
index 35f882f28a..9bea12e48c 100644
--- a/include/haproxy/backend-t.h
+++ b/include/haproxy/backend-t.h
@@ -26,6 +26,7 @@
 #include <haproxy/lb_chash-t.h>
 #include <haproxy/lb_fas-t.h>
 #include <haproxy/lb_fwlc-t.h>
+#include <haproxy/lb_pewma-t.h>
 #include <haproxy/lb_fwrr-t.h>
 #include <haproxy/lb_map-t.h>
 #include <haproxy/lb_ss-t.h>
@@ -58,6 +59,7 @@
 /* BE_LB_CB_* is used with BE_LB_KIND_CB */
 #define BE_LB_CB_LC     0x00000000  /* least-connections */
 #define BE_LB_CB_FAS    0x00000001  /* first available server (opposite of leastconn) */
+#define BE_LB_CB_PE     0x00000002  /* peak ewma */
 
 /* BE_LB_SA_* is used with BE_LB_KIND_SA */
 #define BE_LB_SA_SS     0x00000000  /* stick to server as long as it is available */
@@ -88,6 +90,7 @@
 #define BE_LB_ALGO_RND  (BE_LB_KIND_RR | BE_LB_NEED_NONE | BE_LB_RR_RANDOM) /* random value */
 #define BE_LB_ALGO_LC   (BE_LB_KIND_CB | BE_LB_NEED_NONE | BE_LB_CB_LC)    /* least connections */
 #define BE_LB_ALGO_FAS  (BE_LB_KIND_CB | BE_LB_NEED_NONE | BE_LB_CB_FAS)   /* first available server */
+#define BE_LB_ALGO_PE   (BE_LB_KIND_CB | BE_LB_NEED_NONE | BE_LB_CB_PE)    /* peak ewma */
 #define BE_LB_ALGO_SS   (BE_LB_KIND_SA | BE_LB_NEED_NONE | BE_LB_SA_SS)    /* sticky */
 #define BE_LB_ALGO_SRR  (BE_LB_KIND_RR | BE_LB_NEED_NONE | BE_LB_RR_STATIC) /* static round robin */
 #define BE_LB_ALGO_SH	(BE_LB_KIND_HI | BE_LB_NEED_ADDR | BE_LB_HASH_SRC) /* hash: source IP */
@@ -109,6 +112,7 @@
 #define BE_LB_LKUP_LCTREE 0x00300000  /* FWLC tree lookup */
 #define BE_LB_LKUP_CHTREE 0x00400000  /* consistent hash  */
 #define BE_LB_LKUP_FSTREE 0x00500000  /* FAS tree lookup */
+#define BE_LB_LKUP_PETREE 0x00600000  /* Peak EWMA tree lookup */
 #define BE_LB_LKUP        0x00700000  /* mask to get just the LKUP value */
 
 /* additional properties */
@@ -158,6 +162,7 @@ struct lbprm {
 		struct lb_fwlc fwlc;
 		struct lb_chash chash;
 		struct lb_fas fas;
+		struct lb_pewma pewma;
 		struct lb_ss ss;
 	};
 	uint32_t algo;			/* load balancing algorithm and variants: BE_LB_* */
-- 
2.43.0

From 1e67db8482e8657c007223f0f31a6648d8a9c17a Mon Sep 17 00:00:00 2001
From: Aleksandar Lazic <[email protected]>
Date: Fri, 6 Feb 2026 00:54:58 +0100
Subject: [PATCH 1/8] MEDIUM/backend: Add Peak EWMA load balancing algorithm

refer to https://github.com/haproxy/haproxy/issues/1570

Signed-off-by: Aleksandar Lazic <[email protected]>
---
 Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 006875048c..0415c7b86e 100644
--- a/Makefile
+++ b/Makefile
@@ -989,7 +989,8 @@ OBJS += src/mux_h2.o src/mux_h1.o src/mux_fcgi.o src/log.o		\
         src/raw_sock.o src/action.o src/stats-file.o src/buf.o		\
         src/xprt_handshake.o src/proto_uxst.o src/lb_fwrr.o		\
         src/uri_normalizer.o src/mailers.o src/protocol.o		\
-        src/cfgcond.o src/proto_udp.o src/lb_fwlc.o src/ebmbtree.o	\
+        src/cfgcond.o src/proto_udp.o src/lb_fwlc.o src/lb_pewma.o	\
+        src/ebmbtree.o	\
         src/proto_uxdg.o src/cfgdiag.o src/sock_unix.o src/sha1.o	\
         src/lb_fas.o src/clock.o src/sock_inet.o src/ev_select.o	\
         src/lb_map.o src/shctx.o src/hpack-dec.o src/net_helper.o       \
-- 
2.43.0

Patch series for New Balancing algorithm (Peak) EWMA

Reply via email to