On Tue, 2020-02-25 at 11:47 +0100, Willy Tarreau wrote:
[...]
> > The patch was written under the assumption that you wanted exactly
> > the same behavior.
> 
> Yes that's preferable indeed.
> 
> > Since you say we maybe can drop them, I have created a
> > new patch which should work fine but does not guarantee that
> > automatic
> > ids form a gapless sequence from 0. This simplifies the algorithm
> > csignificantly. The downside is that an overflow can happen if a
> > very
> > high -u number is used in the config file, so I added a check for
> > that.
> > The maximum is still very high.
> 
> The problem now becomes a matter of backwards compatibility as it
> will refuse to start on some configs otherwise valid on older
> versions.
> I'd rather not do this. We could have simply detected the overflow in
> the automatic assignment but I'd rather keep the feature even if I
> don't use it myself. I've looked at the doc and figured what it's
> used for. It's for when you want to replace ACL or map values from
> the CLI. There is not always a unique reference file name to work
> with, but there's a unique ID that can be used prefixed by a '#'.
> It's possible that some infrastructures automatically update usage
> thresholds from the CLI using this, for example.
> 
> > > I don't know how the current code deals with duplicated IDs nor
> > > what impact these might have.
> > > 
> > 
> > Duplicate IDs are handled when the configuration file is read. That
> > code is not efficient either but I did not look at improving that
> > because I do not know if anyone uses this feature *and* use very
> > large configuration files. For small hand written files the code is
> > fine.
> 
> Agreed. I've seen the pat_ref_lookupid() which does linear search.
> I'm not that much worried by this considering that you managed to
> significantly shrink the startup time in your case already.
> 
> Well, after having looked at it more closely, I'd still rather keep
> the hole-filling algorithm, so if you have a v3 between the two
> versions it would be great :-)

Do you mean just with an exit code? And maybe change
sizeof(struct pat_ref *) to sizeof(arr[0]) - if this is safe event when
arr is of length 0?


> Thanks!
> Willy
From 7f1d255901fb4ed6252154a8d14775180835088f Mon Sep 17 00:00:00 2001
From: Carl Henrik Lunde <[email protected]>
Date: Thu, 27 Feb 2020 15:30:13 +0100
Subject: [PATCH] OPTIM: startup: fast unique_id allocation for acl.

pattern_finalize_config() uses an inefficient algorithm which is a
problem with very large configuration files. This affects startup, and
therefore reload time. When haproxy is deployed as a router in a
Kubernetes cluster the generated configuration file may be large and
reloads are frequently occuring, which makes this a significant issue.

The old algorithm is O(n^2)
* allocate missing uids - O(n^2)
* sort linked list - O(n^2)

The new algorithm is O(n log n):
* find the user allocated uids - O(n)
* store them for efficient lookup - O(n log n)
* allocate missing uids - n times O(log n)
* sort all uids - O(n log n)
* convert back to linked list - O(n)

Performance examples, startup time in seconds:

    pat_refs old     new
    1000      0.02   0.01
    10000     2.1    0.04
    20000    12.3    0.07
    30000    27.9    0.10
    40000    52.5    0.14
    50000    77.5    0.17

Please backport to 1.8 and 2.0.
---
 include/proto/pattern.h |  2 +-
 src/haproxy.c           |  6 ++-
 src/pattern.c           | 89 +++++++++++++++++++++++++++--------------
 3 files changed, 64 insertions(+), 33 deletions(-)

diff --git a/include/proto/pattern.h b/include/proto/pattern.h
index 5b9929614..73d3cdcf3 100644
--- a/include/proto/pattern.h
+++ b/include/proto/pattern.h
@@ -37,7 +37,7 @@ extern void (*pat_prune_fcts[PAT_MATCH_NUM])(struct pattern_expr *);
 extern struct pattern *(*pat_match_fcts[PAT_MATCH_NUM])(struct sample *, struct pattern_expr *, int);
 extern int pat_match_types[PAT_MATCH_NUM];
 
-void pattern_finalize_config(void);
+int pattern_finalize_config(void);
 
 /* return the PAT_MATCH_* index for match name "name", or < 0 if not found */
 static inline int pat_find_match_name(const char *name)
diff --git a/src/haproxy.c b/src/haproxy.c
index f04ccea6e..25a4328cb 100644
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -1898,7 +1898,11 @@ static void init(int argc, char **argv)
 		exit(1);
 	}
 
-	pattern_finalize_config();
+	err_code |= pattern_finalize_config();
+	if (err_code & (ERR_ABORT|ERR_FATAL)) {
+		ha_alert("Failed to finalize pattern config.\n");
+		exit(1);
+	}
 
 	/* recompute the amount of per-process memory depending on nbproc and
 	 * the shared SSL cache size (allowed to exist in all processes).
diff --git a/src/pattern.c b/src/pattern.c
index 90067cd23..6f9ffcdc5 100644
--- a/src/pattern.c
+++ b/src/pattern.c
@@ -2643,53 +2643,80 @@ int pattern_delete(struct pattern_expr *expr, struct pat_ref_elt *ref)
 	return 1;
 }
 
-/* This function finalize the configuration parsing. Its set all the
+/* This function compares two pat_ref** on unique_id */
+static int cmp_pat_ref(const void *_a, const void *_b)
+{
+	struct pat_ref * const *a = _a;
+	struct pat_ref * const *b = _b;
+
+	if ((*a)->unique_id < (*b)->unique_id)
+		return -1;
+	else if ((*a)->unique_id > (*b)->unique_id)
+		return 1;
+	return 0;
+}
+
+/* This function finalize the configuration parsing. It sets all the
  * automatic ids
  */
-void pattern_finalize_config(void)
+int pattern_finalize_config(void)
 {
-	int i = 0;
-	struct pat_ref *ref, *ref2, *ref3;
+	int len = 0;
+	int unassigned_pos = 0;
+	int next_unique_id = 0;
+	int i, j;
+	struct pat_ref *ref, **arr;
 	struct list pr = LIST_HEAD_INIT(pr);
 
 	pat_lru_seed = random();
 
+	/* Count pat_refs with user defined unique_id and totalt count */
 	list_for_each_entry(ref, &pattern_reference, list) {
-		if (ref->unique_id == -1) {
-			/* Look for the first free id. */
-			while (1) {
-				list_for_each_entry(ref2, &pattern_reference, list) {
-					if (ref2->unique_id == i) {
-						i++;
-						break;
-					}
-				}
-				if (&ref2->list == &pattern_reference)
-					break;
-			}
+		len++;
+		if (ref->unique_id != -1)
+			unassigned_pos++;
+	}
 
-			/* Uses the unique id and increment it for the next entry. */
-			ref->unique_id = i;
-			i++;
-		}
+	arr = calloc(len, sizeof(struct pat_ref *));
+	if (arr == NULL) {
+		ha_alert("Out of memory error.\n");
+		return ERR_ALERT | ERR_FATAL;
 	}
 
-	/* This sort the reference list by id. */
-	list_for_each_entry_safe(ref, ref2, &pattern_reference, list) {
-		LIST_DEL(&ref->list);
-		list_for_each_entry(ref3, &pr, list) {
-			if (ref->unique_id < ref3->unique_id) {
-				LIST_ADDQ(&ref3->list, &ref->list);
-				break;
-			}
-		}
-		if (&ref3->list == &pr)
-			LIST_ADDQ(&pr, &ref->list);
+	i = 0;
+	j = unassigned_pos;
+	list_for_each_entry(ref, &pattern_reference, list) {
+		if (ref->unique_id != -1)
+			arr[i++] = ref;
+		else
+			arr[j++] = ref;
+	}
+
+	/* Sort first segment of array with user-defined unique ids for
+	 * fast lookup when generating unique ids
+	 */
+	qsort(arr, unassigned_pos, sizeof(arr[0]), cmp_pat_ref);
+
+	/* Assign unique ids to the rest of the elements */
+	for (i = unassigned_pos; i < len; i++) {
+		do {
+			arr[i]->unique_id = next_unique_id++;
+		} while (bsearch(&arr[i], arr, unassigned_pos, sizeof(arr[0]), cmp_pat_ref));
 	}
 
+	/* Sort complete array */
+	qsort(arr, len, sizeof(arr[0]), cmp_pat_ref);
+
+	/* Convert back to linked list */
+	for (i = 0; i < len; i++)
+		LIST_ADDQ(&pr, &arr[i]->list);
+
 	/* swap root */
 	LIST_ADD(&pr, &pattern_reference);
 	LIST_DEL(&pr);
+
+	free(arr);
+	return 0;
 }
 
 static int pattern_per_thread_lru_alloc()
-- 
2.18.2

Reply via email to