Prefix lists were implemented with a simple linear list that is scanned
sequentially. This is, of course, extremely inefficient as it scales by
O(n). This patch adds a trie-ish data structure that allows quickly
descending based on the prefix.
Note that the trie structure used here is designed for real-world use,
hence it uses a relatively crude fixed-size bytewise table instead of
some fancy balancing scheme. It is quite cacheline efficient.
Using real-world routeserver prefix lists, matching against a fulltable
dump:
entries before after factor
9103 63.8s .0124s 5142x
772 4.52s .0101s 445.3x
86 .445s .0098s 45.51x
7 .0379s .0099s 3.834x
2 .0136s .0095s 1.440x
1 .0084s .0095s .879x
This buys CPU with memory. Memory usage on an IXP setup with 100k
prefix list entries is an additional 4 MB on top of the 9.5 MB that it
was before.
---
Not-Signed-off-by: David Lamparter <[email protected]>
This patch needs more documentation/comments; just mailing this out for
people to scroll through and voice high-level comments. Also, the test
tools aren't cleaned up yet.
---
lib/memtypes.c | 1 +
lib/plist.c | 229 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
lib/plist_int.h | 7 ++
3 files changed, 231 insertions(+), 6 deletions(-)
diff --git a/lib/memtypes.c b/lib/memtypes.c
index 1a0c11f..a5b76cc 100644
--- a/lib/memtypes.c
+++ b/lib/memtypes.c
@@ -48,6 +48,7 @@ struct memory_list memory_list_lib[] =
{ MTYPE_PREFIX_LIST, "Prefix List" },
{ MTYPE_PREFIX_LIST_ENTRY, "Prefix List Entry" },
{ MTYPE_PREFIX_LIST_STR, "Prefix List Str" },
+ { MTYPE_PREFIX_LIST_TRIE, "Prefix List Trie Table" },
{ MTYPE_ROUTE_MAP, "Route map" },
{ MTYPE_ROUTE_MAP_NAME, "Route map name" },
{ MTYPE_ROUTE_MAP_INDEX, "Route map index" },
diff --git a/lib/plist.c b/lib/plist.c
index d8700c8..1181df1 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -32,6 +32,26 @@
#include "plist_int.h"
+/* not currently changeable, code assumes bytes further down */
+#define PLC_BITS 8
+#define PLC_LEN (1 << PLC_BITS)
+#define PLC_MAXLEVELV4 2 /* /24 for IPv4 */
+#define PLC_MAXLEVELV6 4 /* /48 for IPv6 */
+#define PLC_MAXLEVEL 4 /* max(v4,v6) */
+
+struct pltrie_entry {
+ union {
+ struct pltrie_table *next_table;
+ struct prefix_list_entry *final_chain;
+ };
+
+ struct prefix_list_entry *up_chain;
+};
+
+struct pltrie_table {
+ struct pltrie_entry entries[PLC_LEN];
+};
+
/* List of struct prefix_list. */
struct prefix_list_list
{
@@ -59,6 +79,9 @@ struct prefix_master
/* Hook function which is executed when prefix_list is deleted. */
void (*delete_hook) (struct prefix_list *);
+
+ /* number of bytes that have a trie level */
+ size_t trie_depth;
};
/* Static structure of IPv4 prefix_list's master. */
@@ -69,6 +92,8 @@ static struct prefix_master prefix_master_ipv4 =
1,
NULL,
NULL,
+ NULL,
+ PLC_MAXLEVELV4,
};
#ifdef HAVE_IPV6
@@ -80,27 +105,33 @@ static struct prefix_master prefix_master_ipv6 =
1,
NULL,
NULL,
+ NULL,
+ PLC_MAXLEVELV6,
};
#endif /* HAVE_IPV6*/
/* Static structure of BGP ORF prefix_list's master. */
-static struct prefix_master prefix_master_orf_v4 =
-{
+static struct prefix_master prefix_master_orf_v4 =
+{
{NULL, NULL},
{NULL, NULL},
1,
NULL,
NULL,
+ NULL,
+ PLC_MAXLEVELV4,
};
/* Static structure of BGP ORF prefix_list's master. */
-static struct prefix_master prefix_master_orf_v6 =
-{
+static struct prefix_master prefix_master_orf_v6 =
+{
{NULL, NULL},
{NULL, NULL},
1,
NULL,
NULL,
+ NULL,
+ PLC_MAXLEVELV6,
};
static struct prefix_master *
@@ -205,6 +236,7 @@ prefix_list_insert (afi_t afi, int orf, const char *name)
plist = prefix_list_new ();
plist->name = XSTRDUP (MTYPE_PREFIX_LIST_STR, name);
plist->master = master;
+ plist->trie = XCALLOC (MTYPE_PREFIX_LIST_TRIE, sizeof (struct pltrie_table));
/* If name is made by all digit character. We treat it as
number. */
@@ -332,7 +364,9 @@ prefix_list_delete (struct prefix_list *plist)
if (plist->name)
XFREE (MTYPE_PREFIX_LIST_STR, plist->name);
-
+
+ XFREE (MTYPE_PREFIX_LIST_TRIE, plist->trie);
+
prefix_list_free (plist);
if (master->delete_hook)
@@ -436,10 +470,87 @@ prefix_list_entry_lookup (struct prefix_list *plist,
struct prefix *prefix,
}
static void
+trie_walk_affected (size_t validbits, struct pltrie_table *table, uint8_t byte,
+ struct prefix_list_entry *object,
+ void (*fn)(struct prefix_list_entry *object,
+ struct prefix_list_entry **updptr))
+{
+ uint8_t mask;
+ uint16_t bwalk;
+
+ if (validbits > PLC_BITS)
+ {
+ fn (object, &table->entries[byte].final_chain);
+ return;
+ }
+
+ mask = (1 << (8 - validbits)) - 1;
+ for (bwalk = byte & ~mask; bwalk <= byte + mask; bwalk++)
+ {
+ fn (object, &table->entries[bwalk].up_chain);
+ }
+}
+
+static void trie_uninstall_fn (struct prefix_list_entry *object,
+ struct prefix_list_entry **updptr)
+{
+ for (; *updptr; updptr = &(*updptr)->next_best)
+ if (*updptr == object)
+ {
+ *updptr = object->next_best;
+ break;
+ }
+}
+
+static int
+trie_table_empty (struct pltrie_table *table)
+{
+ size_t i;
+ for (i = 0; i < PLC_LEN; i++)
+ if (table->entries[i].next_table || table->entries[i].final_chain)
+ return 0;
+ return 1;
+}
+
+static void
+prefix_list_trie_del (struct prefix_list *plist,
+ struct prefix_list_entry *pentry)
+{
+ size_t depth, maxdepth = plist->master->trie_depth;
+ uint8_t *bytes = &pentry->prefix.u.prefix;
+ size_t validbits = pentry->prefix.prefixlen;
+ struct pltrie_table *table, **tables[PLC_MAXLEVEL];
+
+ table = plist->trie;
+ for (depth = 0; validbits > PLC_BITS && depth < maxdepth - 1; depth++)
+ {
+ uint8_t byte = bytes[depth];
+ assert (table->entries[byte].next_table);
+
+ tables[depth + 1] = &table->entries[byte].next_table;
+ table = table->entries[byte].next_table;
+
+ validbits -= PLC_BITS;
+ }
+
+ trie_walk_affected (validbits, table, bytes[depth], pentry,
trie_uninstall_fn);
+
+ for (; depth > 0; depth--)
+ if (trie_table_empty (*tables[depth]))
+ {
+ XFREE (MTYPE_PREFIX_LIST_TRIE, *tables[depth]);
+ *tables[depth] = NULL;
+ }
+}
+
+
+static void
prefix_list_entry_delete (struct prefix_list *plist,
struct prefix_list_entry *pentry,
int update_list)
{
+ prefix_list_trie_del (plist, pentry);
+
if (plist == NULL || pentry == NULL)
return;
if (pentry->prev)
@@ -467,6 +578,52 @@ prefix_list_entry_delete (struct prefix_list *plist,
}
}
+static void trie_install_fn (struct prefix_list_entry *object,
+ struct prefix_list_entry **updptr)
+{
+ while (*updptr)
+ {
+ if (*updptr == object)
+ return;
+ if ((*updptr)->prefix.prefixlen < object->prefix.prefixlen)
+ break;
+ if ((*updptr)->seq > object->seq)
+ break;
+ updptr = &(*updptr)->next_best;
+ }
+
+ if (!object->next_best)
+ object->next_best = *updptr;
+ else
+ assert (object->next_best == *updptr || !*updptr);
+
+ *updptr = object;
+}
+
+static void
+prefix_list_trie_add (struct prefix_list *plist,
+ struct prefix_list_entry *pentry)
+{
+ size_t depth = plist->master->trie_depth;
+ uint8_t *bytes = &pentry->prefix.u.prefix;
+ size_t validbits = pentry->prefix.prefixlen;
+ struct pltrie_table *table;
+
+ table = plist->trie;
+ while (validbits > PLC_BITS && depth > 1)
+ {
+ if (!table->entries[*bytes].next_table)
+ table->entries[*bytes].next_table = XCALLOC (MTYPE_PREFIX_LIST_TRIE,
+ sizeof(struct pltrie_table));
+ table = table->entries[*bytes].next_table;
+ bytes++;
+ depth--;
+ validbits -= PLC_BITS;
+ }
+
+ trie_walk_affected (validbits, table, *bytes, pentry, trie_install_fn);
+}
+
static void
prefix_list_entry_add (struct prefix_list *plist,
struct prefix_list_entry *pentry)
@@ -512,6 +669,8 @@ prefix_list_entry_add (struct prefix_list *plist,
plist->tail = pentry;
}
+ prefix_list_trie_add (plist, pentry);
+
/* Increment count. */
plist->count++;
@@ -566,7 +725,7 @@ prefix_list_entry_match (struct prefix_list_entry *pentry,
struct prefix *p)
}
enum prefix_list_type
-prefix_list_apply (struct prefix_list *plist, void *object)
+prefix_list_apply_old (struct prefix_list *plist, void *object)
{
struct prefix_list_entry *pentry;
struct prefix *p;
@@ -592,6 +751,64 @@ prefix_list_apply (struct prefix_list *plist, void *object)
return PREFIX_DENY;
}
+enum prefix_list_type
+prefix_list_apply (struct prefix_list *plist, void *object)
+{
+ struct prefix_list_entry *pentry, *pbest = NULL;
+
+ struct prefix *p = (struct prefix *) object;
+ uint8_t *byte = &p->u.prefix;
+ size_t depth = plist->master->trie_depth;
+ size_t validbits = p->prefixlen;
+ struct pltrie_table *table;
+
+ if (plist == NULL)
+ return PREFIX_DENY;
+
+ if (plist->count == 0)
+ return PREFIX_PERMIT;
+
+ table = plist->trie;
+ while (1)
+ {
+ for (pentry = table->entries[*byte].up_chain; pentry; pentry =
pentry->next_best)
+ {
+ if (pbest && pbest->seq < pentry->seq)
+ continue;
+ if (prefix_list_entry_match (pentry, p))
+ pbest = pentry;
+ }
+
+ if (validbits <= PLC_BITS)
+ break;
+ validbits -= PLC_BITS;
+
+ if (--depth)
+ {
+ if (!table->entries[*byte].next_table)
+ break;
+
+ table = table->entries[*byte].next_table;
+ byte++;
+ continue;
+ }
+
+ for (pentry = table->entries[*byte].final_chain; pentry; pentry =
pentry->next_best)
+ {
+ if (pbest && pbest->seq < pentry->seq)
+ continue;
+ if (prefix_list_entry_match (pentry, p))
+ pbest = pentry;
+ }
+ break;
+ }
+
+ if (pbest == NULL)
+ return PREFIX_DENY;
+
+ return pbest->type;
+}
+
static void __attribute__ ((unused))
prefix_list_print (struct prefix_list *plist)
{
diff --git a/lib/plist_int.h b/lib/plist_int.h
index 6459579..e6e5901 100644
--- a/lib/plist_int.h
+++ b/lib/plist_int.h
@@ -29,6 +29,8 @@ enum prefix_name_type
PREFIX_TYPE_NUMBER
};
+struct pltrie_table;
+
struct prefix_list
{
char *name;
@@ -44,6 +46,8 @@ struct prefix_list
struct prefix_list_entry *head;
struct prefix_list_entry *tail;
+ struct pltrie_table *trie;
+
struct prefix_list *next;
struct prefix_list *prev;
};
@@ -66,6 +70,9 @@ struct prefix_list_entry
struct prefix_list_entry *next;
struct prefix_list_entry *prev;
+
+ /* up the chain for best match search */
+ struct prefix_list_entry *next_best;
};
#endif /* _QUAGGA_PLIST_INT_H */
--
2.0.4
_______________________________________________
Quagga-dev mailing list
[email protected]
https://lists.quagga.net/mailman/listinfo/quagga-dev