Module Name: src
Committed By: snj
Date: Sun Dec 18 07:40:50 UTC 2016
Modified Files:
src/sys/modules/npf [netbsd-7]: Makefile
src/sys/net/npf [netbsd-7]: files.npf npf_impl.h npf_tableset.c
src/sys/rump/net/lib/libnpf [netbsd-7]: Makefile
Added Files:
src/sys/net/npf [netbsd-7]: lpm.c lpm.h
Removed Files:
src/sys/net/npf [netbsd-7]: npf_tableset_ptree.c
Log Message:
Pull up following revision(s) (requested by rmind in ticket #1319):
sys/modules/npf/Makefile: revision 1.19
sys/net/npf/files.npf: revision 1.18
sys/net/npf/lpm.c: revision 1.1
sys/net/npf/lpm.h: revision 1.1
sys/net/npf/npf_impl.h: revision 1.62
sys/net/npf/npf_tableset.c: revision 1.24
sys/net/npf/npf_tableset_ptree.c: file removal
sys/rump/net/lib/libnpf/Makefile: revision 1.18
This patches ditches the ptree(3) library, because it is broken (you
can get missing entries!). Instead, as a temporary solution, we switch
to a simple linear scan of the hash tables for the longest-prefix-match
(lpm.c lpm.h) algorithm. In fact, with few unique prefixes in the set,
on modern hardware this simple algorithm is pretty fast anyway!
--
ditch ptree and use lpm
--
remove ptree add lpm
To generate a diff of this commit:
cvs rdiff -u -r1.17 -r1.17.2.1 src/sys/modules/npf/Makefile
cvs rdiff -u -r1.17 -r1.17.2.1 src/sys/net/npf/files.npf
cvs rdiff -u -r0 -r1.1.2.2 src/sys/net/npf/lpm.c src/sys/net/npf/lpm.h
cvs rdiff -u -r1.58.2.3 -r1.58.2.4 src/sys/net/npf/npf_impl.h
cvs rdiff -u -r1.22 -r1.22.2.1 src/sys/net/npf/npf_tableset.c
cvs rdiff -u -r1.1 -r0 src/sys/net/npf/npf_tableset_ptree.c
cvs rdiff -u -r1.14 -r1.14.2.1 src/sys/rump/net/lib/libnpf/Makefile
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/modules/npf/Makefile
diff -u src/sys/modules/npf/Makefile:1.17 src/sys/modules/npf/Makefile:1.17.2.1
--- src/sys/modules/npf/Makefile:1.17 Sat Jul 19 18:24:17 2014
+++ src/sys/modules/npf/Makefile Sun Dec 18 07:40:50 2016
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.17 2014/07/19 18:24:17 rmind Exp $
+# $NetBSD: Makefile,v 1.17.2.1 2016/12/18 07:40:50 snj Exp $
#
# Public Domain.
#
@@ -13,7 +13,7 @@ SRCS= npf.c npf_alg.c npf_conf.c npf_ct
SRCS+= npf_bpf.c npf_if.c npf_inet.c npf_mbuf.c npf_nat.c
SRCS+= npf_ruleset.c npf_conn.c npf_conndb.c npf_rproc.c
SRCS+= npf_state.c npf_state_tcp.c npf_tableset.c
-SRCS+= npf_tableset_ptree.c npf_sendpkt.c npf_worker.c
+SRCS+= lpm.c npf_sendpkt.c npf_worker.c
CPPFLAGS+= -DINET6
Index: src/sys/net/npf/files.npf
diff -u src/sys/net/npf/files.npf:1.17 src/sys/net/npf/files.npf:1.17.2.1
--- src/sys/net/npf/files.npf:1.17 Sat Jul 19 18:24:16 2014
+++ src/sys/net/npf/files.npf Sun Dec 18 07:40:50 2016
@@ -1,4 +1,4 @@
-# $NetBSD: files.npf,v 1.17 2014/07/19 18:24:16 rmind Exp $
+# $NetBSD: files.npf,v 1.17.2.1 2016/12/18 07:40:50 snj Exp $
#
# Public Domain.
#
@@ -19,7 +19,6 @@ file net/npf/npf_bpf.c npf
file net/npf/npf_ruleset.c npf
file net/npf/npf_rproc.c npf
file net/npf/npf_tableset.c npf
-file net/npf/npf_tableset_ptree.c npf
file net/npf/npf_if.c npf
file net/npf/npf_inet.c npf
file net/npf/npf_conn.c npf
@@ -31,6 +30,9 @@ file net/npf/npf_alg.c npf
file net/npf/npf_sendpkt.c npf
file net/npf/npf_worker.c npf
+# LPM
+file net/npf/lpm.c npf
+
# Built-in extensions.
file net/npf/npf_ext_log.c npf
file net/npf/npf_ext_normalize.c npf
Index: src/sys/net/npf/npf_impl.h
diff -u src/sys/net/npf/npf_impl.h:1.58.2.3 src/sys/net/npf/npf_impl.h:1.58.2.4
--- src/sys/net/npf/npf_impl.h:1.58.2.3 Wed Feb 4 07:13:04 2015
+++ src/sys/net/npf/npf_impl.h Sun Dec 18 07:40:50 2016
@@ -1,4 +1,4 @@
-/* $NetBSD: npf_impl.h,v 1.58.2.3 2015/02/04 07:13:04 snj Exp $ */
+/* $NetBSD: npf_impl.h,v 1.58.2.4 2016/12/18 07:40:50 snj Exp $ */
/*-
* Copyright (c) 2009-2014 The NetBSD Foundation, Inc.
@@ -49,7 +49,6 @@
#include <sys/types.h>
#include <sys/queue.h>
-#include <sys/ptree.h>
#include <net/bpf.h>
#include <net/bpfjit.h>
@@ -228,8 +227,6 @@ bool npf_bpf_validate(const void *, siz
void npf_tableset_sysinit(void);
void npf_tableset_sysfini(void);
-extern const pt_tree_ops_t npf_table_ptree_ops;
-
npf_tableset_t *npf_tableset_create(u_int);
void npf_tableset_destroy(npf_tableset_t *);
int npf_tableset_insert(npf_tableset_t *, npf_table_t *);
Index: src/sys/net/npf/npf_tableset.c
diff -u src/sys/net/npf/npf_tableset.c:1.22 src/sys/net/npf/npf_tableset.c:1.22.2.1
--- src/sys/net/npf/npf_tableset.c:1.22 Mon Aug 11 01:54:12 2014
+++ src/sys/net/npf/npf_tableset.c Sun Dec 18 07:40:50 2016
@@ -1,7 +1,7 @@
-/* $NetBSD: npf_tableset.c,v 1.22 2014/08/11 01:54:12 rmind Exp $ */
+/* $NetBSD: npf_tableset.c,v 1.22.2.1 2016/12/18 07:40:50 snj Exp $ */
/*-
- * Copyright (c) 2009-2014 The NetBSD Foundation, Inc.
+ * Copyright (c) 2009-2016 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This material is based upon work partially supported by The
@@ -41,7 +41,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.22 2014/08/11 01:54:12 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.22.2.1 2016/12/18 07:40:50 snj Exp $");
#include <sys/param.h>
#include <sys/types.h>
@@ -58,13 +58,12 @@ __KERNEL_RCSID(0, "$NetBSD: npf_tableset
#include <sys/types.h>
#include "npf_impl.h"
+#include "lpm.h"
typedef struct npf_tblent {
- union {
- LIST_ENTRY(npf_tblent) te_hashent;
- pt_node_t te_node;
- } /* C11 */;
- int te_alen;
+ LIST_ENTRY(npf_tblent) te_listent;
+ uint16_t te_preflen;
+ uint16_t te_alen;
npf_addr_t te_addr;
} npf_tblent_t;
@@ -81,7 +80,8 @@ struct npf_table {
u_long t_hashmask;
};
struct {
- pt_tree_t t_tree[2];
+ lpm_t * t_lpm;
+ LIST_HEAD(, npf_tblent) t_list;
};
struct {
void * t_blob;
@@ -294,7 +294,7 @@ table_hash_lookup(const npf_table_t *t,
* Lookup the hash table and check for duplicates.
* Note: mask is ignored for the hash storage.
*/
- LIST_FOREACH(ent, htbl, te_hashent) {
+ LIST_FOREACH(ent, htbl, te_listent) {
if (ent->te_alen != alen) {
continue;
}
@@ -307,27 +307,28 @@ table_hash_lookup(const npf_table_t *t,
}
static void
-table_hash_destroy(npf_table_t *t)
+table_hash_flush(npf_table_t *t)
{
for (unsigned n = 0; n <= t->t_hashmask; n++) {
npf_tblent_t *ent;
while ((ent = LIST_FIRST(&t->t_hashl[n])) != NULL) {
- LIST_REMOVE(ent, te_hashent);
+ LIST_REMOVE(ent, te_listent);
pool_cache_put(tblent_cache, ent);
}
}
}
static void
-table_tree_destroy(pt_tree_t *tree)
+table_tree_flush(npf_table_t *t)
{
npf_tblent_t *ent;
- while ((ent = ptree_iterate(tree, NULL, PT_ASCENDING)) != NULL) {
- ptree_remove_node(tree, ent);
+ while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
+ LIST_REMOVE(ent, te_listent);
pool_cache_put(tblent_cache, ent);
}
+ lpm_clear(t->t_lpm, NULL, NULL);
}
/*
@@ -339,35 +340,27 @@ npf_table_create(const char *name, u_int
{
npf_table_t *t;
- t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
+ t = kmem_zalloc(sizeof(*t), KM_SLEEP);
strlcpy(t->t_name, name, NPF_TABLE_MAXNAMELEN);
switch (type) {
case NPF_TABLE_TREE:
- ptree_init(&t->t_tree[0], &npf_table_ptree_ops,
- (void *)(sizeof(struct in_addr) / sizeof(uint32_t)),
- offsetof(npf_tblent_t, te_node),
- offsetof(npf_tblent_t, te_addr));
- ptree_init(&t->t_tree[1], &npf_table_ptree_ops,
- (void *)(sizeof(struct in6_addr) / sizeof(uint32_t)),
- offsetof(npf_tblent_t, te_node),
- offsetof(npf_tblent_t, te_addr));
+ if ((t->t_lpm = lpm_create()) == NULL)
+ goto out;
+ LIST_INIT(&t->t_list);
break;
case NPF_TABLE_HASH:
t->t_hashl = hashinit(1024, HASH_LIST, true, &t->t_hashmask);
- if (t->t_hashl == NULL) {
- kmem_free(t, sizeof(npf_table_t));
- return NULL;
- }
+ if (t->t_hashl == NULL)
+ goto out;
break;
case NPF_TABLE_CDB:
t->t_blob = blob;
t->t_bsize = size;
t->t_cdb = cdbr_open_mem(blob, size, CDBR_DEFAULT, NULL, NULL);
if (t->t_cdb == NULL) {
- kmem_free(t, sizeof(npf_table_t));
free(blob, M_TEMP);
- return NULL;
+ goto out;
}
t->t_nitems = cdbr_entries(t->t_cdb);
break;
@@ -379,6 +372,10 @@ npf_table_create(const char *name, u_int
t->t_id = tid;
return t;
+out:
+ kmem_free(t, sizeof(*t));
+ return NULL;
+
}
/*
@@ -391,12 +388,12 @@ npf_table_destroy(npf_table_t *t)
switch (t->t_type) {
case NPF_TABLE_HASH:
- table_hash_destroy(t);
+ table_hash_flush(t);
hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
break;
case NPF_TABLE_TREE:
- table_tree_destroy(&t->t_tree[0]);
- table_tree_destroy(&t->t_tree[1]);
+ table_tree_flush(t);
+ lpm_destroy(t->t_lpm);
break;
case NPF_TABLE_CDB:
cdbr_close(t->t_cdb);
@@ -406,7 +403,7 @@ npf_table_destroy(npf_table_t *t)
KASSERT(false);
}
rw_destroy(&t->t_lock);
- kmem_free(t, sizeof(npf_table_t));
+ kmem_free(t, sizeof(*t));
}
/*
@@ -494,7 +491,7 @@ npf_table_insert(npf_table_t *t, const i
break;
}
if (!table_hash_lookup(t, addr, alen, &htbl)) {
- LIST_INSERT_HEAD(htbl, ent, te_hashent);
+ LIST_INSERT_HEAD(htbl, ent, te_listent);
t->t_nitems++;
} else {
error = EEXIST;
@@ -502,16 +499,12 @@ npf_table_insert(npf_table_t *t, const i
break;
}
case NPF_TABLE_TREE: {
- pt_tree_t *tree = &t->t_tree[aidx];
- bool ok;
-
- /*
- * If no mask specified, use maximum mask.
- */
- ok = (mask != NPF_NO_NETMASK) ?
- ptree_insert_mask_node(tree, ent, mask) :
- ptree_insert_node(tree, ent);
- if (ok) {
+ const unsigned preflen =
+ (mask == NPF_NO_NETMASK) ? (alen * 8) : mask;
+ if (lpm_lookup(t->t_lpm, addr, alen) == NULL &&
+ lpm_insert(t->t_lpm, addr, alen, preflen, ent) == 0) {
+ LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
+ ent->te_preflen = preflen;
t->t_nitems++;
error = 0;
} else {
@@ -556,17 +549,17 @@ npf_table_remove(npf_table_t *t, const i
ent = table_hash_lookup(t, addr, alen, &htbl);
if (__predict_true(ent != NULL)) {
- LIST_REMOVE(ent, te_hashent);
+ LIST_REMOVE(ent, te_listent);
t->t_nitems--;
}
break;
}
case NPF_TABLE_TREE: {
- pt_tree_t *tree = &t->t_tree[aidx];
-
- ent = ptree_find_node(tree, addr);
+ ent = lpm_lookup(t->t_lpm, addr, alen);
if (__predict_true(ent != NULL)) {
- ptree_remove_node(tree, ent);
+ LIST_REMOVE(ent, te_listent);
+ lpm_remove(t->t_lpm, &ent->te_addr,
+ ent->te_alen, ent->te_preflen);
t->t_nitems--;
}
break;
@@ -611,7 +604,7 @@ npf_table_lookup(npf_table_t *t, const i
break;
case NPF_TABLE_TREE:
rw_enter(&t->t_lock, RW_READER);
- found = ptree_find_node(&t->t_tree[aidx], addr) != NULL;
+ found = lpm_lookup(t->t_lpm, addr, alen) != NULL;
rw_exit(&t->t_lock);
break;
case NPF_TABLE_CDB:
@@ -655,7 +648,7 @@ table_hash_list(const npf_table_t *t, vo
for (unsigned n = 0; n <= t->t_hashmask; n++) {
npf_tblent_t *ent;
- LIST_FOREACH(ent, &t->t_hashl[n], te_hashent) {
+ LIST_FOREACH(ent, &t->t_hashl[n], te_listent) {
error = table_ent_copyout(&ent->te_addr,
ent->te_alen, 0, ubuf, len, &off);
if (error)
@@ -666,20 +659,15 @@ table_hash_list(const npf_table_t *t, vo
}
static int
-table_tree_list(pt_tree_t *tree, npf_netmask_t maxmask, void *ubuf,
- size_t len, size_t *off)
+table_tree_list(const npf_table_t *t, void *ubuf, size_t len)
{
- npf_tblent_t *ent = NULL;
+ npf_tblent_t *ent;
+ size_t off = 0;
int error = 0;
- while ((ent = ptree_iterate(tree, ent, PT_ASCENDING)) != NULL) {
- pt_bitlen_t blen;
-
- if (!ptree_mask_node_p(tree, ent, &blen)) {
- blen = maxmask;
- }
- error = table_ent_copyout(&ent->te_addr, ent->te_alen,
- blen, ubuf, len, off);
+ LIST_FOREACH(ent, &t->t_list, te_listent) {
+ error = table_ent_copyout(&ent->te_addr,
+ ent->te_alen, 0, ubuf, len, &off);
if (error)
break;
}
@@ -710,7 +698,6 @@ table_cdb_list(npf_table_t *t, void *ubu
int
npf_table_list(npf_table_t *t, void *ubuf, size_t len)
{
- size_t off = 0;
int error = 0;
rw_enter(&t->t_lock, RW_READER);
@@ -719,10 +706,7 @@ npf_table_list(npf_table_t *t, void *ubu
error = table_hash_list(t, ubuf, len);
break;
case NPF_TABLE_TREE:
- error = table_tree_list(&t->t_tree[0], 32, ubuf, len, &off);
- if (error)
- break;
- error = table_tree_list(&t->t_tree[1], 128, ubuf, len, &off);
+ error = table_tree_list(t, ubuf, len);
break;
case NPF_TABLE_CDB:
error = table_cdb_list(t, ubuf, len);
@@ -746,12 +730,11 @@ npf_table_flush(npf_table_t *t)
rw_enter(&t->t_lock, RW_WRITER);
switch (t->t_type) {
case NPF_TABLE_HASH:
- table_hash_destroy(t);
+ table_hash_flush(t);
t->t_nitems = 0;
break;
case NPF_TABLE_TREE:
- table_tree_destroy(&t->t_tree[0]);
- table_tree_destroy(&t->t_tree[1]);
+ table_tree_flush(t);
t->t_nitems = 0;
break;
case NPF_TABLE_CDB:
Index: src/sys/rump/net/lib/libnpf/Makefile
diff -u src/sys/rump/net/lib/libnpf/Makefile:1.14 src/sys/rump/net/lib/libnpf/Makefile:1.14.2.1
--- src/sys/rump/net/lib/libnpf/Makefile:1.14 Sat Jul 19 18:24:16 2014
+++ src/sys/rump/net/lib/libnpf/Makefile Sun Dec 18 07:40:50 2016
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.14 2014/07/19 18:24:16 rmind Exp $
+# $NetBSD: Makefile,v 1.14.2.1 2016/12/18 07:40:50 snj Exp $
#
# Public Domain.
#
@@ -13,7 +13,7 @@ SRCS= npf.c npf_alg.c npf_conf.c npf_ctl
SRCS+= npf_bpf.c npf_if.c npf_inet.c npf_mbuf.c npf_nat.c
SRCS+= npf_ruleset.c npf_conn.c npf_conndb.c npf_rproc.c
SRCS+= npf_state.c npf_state_tcp.c npf_tableset.c
-SRCS+= npf_tableset_ptree.c npf_sendpkt.c npf_worker.c
+SRCS+= lpm.c npf_sendpkt.c npf_worker.c
SRCS+= if_npflog.c
Added files:
Index: src/sys/net/npf/lpm.c
diff -u /dev/null src/sys/net/npf/lpm.c:1.1.2.2
--- /dev/null Sun Dec 18 07:40:50 2016
+++ src/sys/net/npf/lpm.c Sun Dec 18 07:40:50 2016
@@ -0,0 +1,401 @@
+/*-
+ * Copyright (c) 2016 Mindaugas Rasiukevicius <rmind at noxt eu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * TODO: Simple linear scan for now (works just well with a few prefixes).
+ * TBD on a better algorithm.
+ */
+
+#if defined(_KERNEL)
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: lpm.c,v 1.1.2.2 2016/12/18 07:40:50 snj Exp $");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/kmem.h>
+#else
+#include <sys/socket.h>
+#include <arpa/inet.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+#include <strings.h>
+#include <errno.h>
+#include <assert.h>
+#define kmem_alloc(a, b) malloc(a)
+#define kmem_free(a, b) free(a)
+#define kmem_zalloc(a, b) calloc(a, 1)
+#endif
+
+#include "lpm.h"
+
+#define LPM_MAX_PREFIX (128)
+#define LPM_MAX_WORDS (LPM_MAX_PREFIX >> 5)
+#define LPM_TO_WORDS(x) ((x) >> 2)
+#define LPM_HASH_STEP (8)
+
+#ifdef DEBUG
+#define ASSERT assert
+#else
+#define ASSERT
+#endif
+
+typedef struct lpm_ent {
+ struct lpm_ent *next;
+ void * val;
+ unsigned len;
+ uint8_t key[];
+} lpm_ent_t;
+
+typedef struct {
+ uint32_t hashsize;
+ uint32_t nitems;
+ lpm_ent_t **bucket;
+} lpm_hmap_t;
+
+struct lpm {
+ uint32_t bitmask[LPM_MAX_WORDS];
+ void * defval;
+ lpm_hmap_t prefix[LPM_MAX_PREFIX + 1];
+};
+
+lpm_t *
+lpm_create(void)
+{
+ return kmem_zalloc(sizeof(lpm_t), KM_SLEEP);
+}
+
+void
+lpm_clear(lpm_t *lpm, lpm_dtor_t dtor, void *arg)
+{
+ for (unsigned n = 0; n <= LPM_MAX_PREFIX; n++) {
+ lpm_hmap_t *hmap = &lpm->prefix[n];
+
+ if (!hmap->hashsize) {
+ KASSERT(!hmap->bucket);
+ continue;
+ }
+ for (unsigned i = 0; i < hmap->hashsize; i++) {
+ lpm_ent_t *entry = hmap->bucket[i];
+
+ while (entry) {
+ lpm_ent_t *next = entry->next;
+
+ if (dtor) {
+ dtor(arg, entry->key,
+ entry->len, entry->val);
+ }
+ kmem_free(entry,
+ offsetof(lpm_ent_t, key[entry->len]));
+ entry = next;
+ }
+ }
+ kmem_free(hmap->bucket, hmap->hashsize);
+ hmap->bucket = NULL;
+ hmap->hashsize = 0;
+ hmap->nitems = 0;
+ }
+ memset(lpm->bitmask, 0, sizeof(lpm->bitmask));
+ lpm->defval = NULL;
+}
+
+void
+lpm_destroy(lpm_t *lpm)
+{
+ lpm_clear(lpm, NULL, NULL);
+ kmem_free(lpm, sizeof(*lpm));
+}
+
+/*
+ * fnv1a_hash: Fowler-Noll-Vo hash function (FNV-1a variant).
+ */
+static uint32_t
+fnv1a_hash(const void *buf, size_t len)
+{
+ uint32_t hash = 2166136261UL;
+ const uint8_t *p = buf;
+
+ while (len--) {
+ hash ^= *p++;
+ hash *= 16777619U;
+ }
+ return hash;
+}
+
+static bool
+hashmap_rehash(lpm_hmap_t *hmap, uint32_t size)
+{
+ lpm_ent_t **bucket;
+ uint32_t hashsize;
+
+ for (hashsize = 1; hashsize < size; hashsize <<= 1) {
+ continue;
+ }
+ bucket = kmem_zalloc(hashsize * sizeof(*bucket), KM_SLEEP);
+ if (bucket == NULL)
+ return false;
+ for (unsigned n = 0; n < hmap->hashsize; n++) {
+ lpm_ent_t *list = hmap->bucket[n];
+
+ while (list) {
+ lpm_ent_t *entry = list;
+ uint32_t hash = fnv1a_hash(entry->key, entry->len);
+ const size_t i = hash & (hashsize - 1);
+
+ list = entry->next;
+ entry->next = bucket[i];
+ bucket[i] = entry;
+ }
+ }
+ if (hmap->bucket)
+ kmem_free(hmap->bucket, hmap->hashsize);
+ hmap->bucket = bucket;
+ hmap->hashsize = hashsize;
+ return true;
+}
+
+static lpm_ent_t *
+hashmap_insert(lpm_hmap_t *hmap, const void *key, size_t len)
+{
+ const uint32_t target = hmap->nitems + LPM_HASH_STEP;
+ const size_t entlen = offsetof(lpm_ent_t, key[len]);
+ uint32_t hash, i;
+ lpm_ent_t *entry;
+
+ if (hmap->hashsize < target && !hashmap_rehash(hmap, target)) {
+ return NULL;
+ }
+
+ hash = fnv1a_hash(key, len);
+ i = hash & (hmap->hashsize - 1);
+ entry = hmap->bucket[i];
+ while (entry) {
+ if (entry->len == len && memcmp(entry->key, key, len) == 0) {
+ return entry;
+ }
+ entry = entry->next;
+ }
+
+ if ((entry = kmem_alloc(entlen, KM_SLEEP)) == NULL)
+ return NULL;
+
+ memcpy(entry->key, key, len);
+ entry->next = hmap->bucket[i];
+ entry->len = len;
+
+ hmap->bucket[i] = entry;
+ hmap->nitems++;
+ return entry;
+}
+
+static lpm_ent_t *
+hashmap_lookup(lpm_hmap_t *hmap, const void *key, size_t len)
+{
+ const uint32_t hash = fnv1a_hash(key, len);
+ const uint32_t i = hash & (hmap->hashsize - 1);
+ lpm_ent_t *entry = hmap->bucket[i];
+
+ while (entry) {
+ if (entry->len == len && memcmp(entry->key, key, len) == 0) {
+ return entry;
+ }
+ entry = entry->next;
+ }
+ return NULL;
+}
+
+static int
+hashmap_remove(lpm_hmap_t *hmap, const void *key, size_t len)
+{
+ const uint32_t hash = fnv1a_hash(key, len);
+ const uint32_t i = hash & (hmap->hashsize - 1);
+ lpm_ent_t *prev = NULL, *entry = hmap->bucket[i];
+
+ while (entry) {
+ if (entry->len == len && memcmp(entry->key, key, len) == 0) {
+ if (prev) {
+ prev->next = entry->next;
+ } else {
+ hmap->bucket[i] = entry->next;
+ }
+ free(entry, M_TEMP);
+ return 0;
+ }
+ prev = entry;
+ entry = entry->next;
+ }
+ return -1;
+}
+
+/*
+ * compute_prefix: given the address and prefix length, compute and
+ * return the address prefix.
+ */
+static inline void
+compute_prefix(const unsigned nwords, const uint32_t *addr,
+ unsigned preflen, uint32_t *prefix)
+{
+ uint32_t addr2[4];
+
+ if ((uintptr_t)addr & 3) {
+ /* Unaligned address: just copy for now. */
+ memcpy(addr2, addr, nwords * 4);
+ addr = addr2;
+ }
+ for (unsigned i = 0; i < nwords; i++) {
+ if (preflen == 0) {
+ prefix[i] = 0;
+ continue;
+ }
+ if (preflen < 32) {
+ uint32_t mask = htonl(0xffffffff << (32 - preflen));
+ prefix[i] = addr[i] & mask;
+ preflen = 0;
+ } else {
+ prefix[i] = addr[i];
+ preflen -= 32;
+ }
+ }
+}
+
+/*
+ * lpm_insert: insert the CIDR into the LPM table.
+ *
+ * => Returns zero on success and -1 on failure.
+ */
+int
+lpm_insert(lpm_t *lpm, const void *addr,
+ size_t len, unsigned preflen, void *val)
+{
+ const unsigned nwords = LPM_TO_WORDS(len);
+ uint32_t prefix[LPM_MAX_WORDS];
+ lpm_ent_t *entry;
+
+ if (preflen == 0) {
+ /* Default is a special case. */
+ lpm->defval = val;
+ return 0;
+ }
+ compute_prefix(nwords, addr, preflen, prefix);
+ entry = hashmap_insert(&lpm->prefix[preflen], prefix, len);
+ if (entry) {
+ const unsigned n = --preflen >> 5;
+ lpm->bitmask[n] |= 0x80000000U >> (preflen & 31);
+ entry->val = val;
+ return 0;
+ }
+ return -1;
+}
+
+/*
+ * lpm_remove: remove the specified prefix.
+ */
+int
+lpm_remove(lpm_t *lpm, const void *addr, size_t len, unsigned preflen)
+{
+ const unsigned nwords = LPM_TO_WORDS(len);
+ uint32_t prefix[LPM_MAX_WORDS];
+
+ if (preflen == 0) {
+ lpm->defval = NULL;
+ return 0;
+ }
+ compute_prefix(nwords, addr, preflen, prefix);
+ return hashmap_remove(&lpm->prefix[preflen], prefix, len);
+}
+
+/*
+ * lpm_lookup: find the longest matching prefix given the IP address.
+ *
+ * => Returns the associated value on success or NULL on failure.
+ */
+void *
+lpm_lookup(lpm_t *lpm, const void *addr, size_t len)
+{
+ const unsigned nwords = LPM_TO_WORDS(len);
+ unsigned i, n = nwords;
+ uint32_t prefix[LPM_MAX_WORDS];
+
+ while (n--) {
+ uint32_t bitmask = lpm->bitmask[n];
+
+ while ((i = ffs(bitmask)) != 0) {
+ const unsigned preflen = (32 * n) + (32 - --i);
+ lpm_hmap_t *hmap = &lpm->prefix[preflen];
+ lpm_ent_t *entry;
+
+ compute_prefix(nwords, addr, preflen, prefix);
+ entry = hashmap_lookup(hmap, prefix, len);
+ if (entry) {
+ return entry->val;
+ }
+ bitmask &= ~(1U << i);
+ }
+ }
+ return lpm->defval;
+}
+
+#if !defined(_KERNEL)
+/*
+ * lpm_strtobin: convert CIDR string to the binary IP address and mask.
+ *
+ * => The address will be in the network byte order.
+ * => Returns 0 on success or -1 on failure.
+ */
+int
+lpm_strtobin(const char *cidr, void *addr, size_t *len, unsigned *preflen)
+{
+ char *p, buf[INET6_ADDRSTRLEN];
+
+ strncpy(buf, cidr, sizeof(buf));
+ buf[sizeof(buf) - 1] = '\0';
+
+ if ((p = strchr(buf, '/')) != NULL) {
+ const ptrdiff_t off = p - buf;
+ *preflen = atoi(&buf[off + 1]);
+ buf[off] = '\0';
+ } else {
+ *preflen = LPM_MAX_PREFIX;
+ }
+
+ if (inet_pton(AF_INET6, buf, addr) == 1) {
+ *len = 16;
+ return 0;
+ }
+ if (inet_pton(AF_INET, buf, addr) == 1) {
+ if (*preflen == LPM_MAX_PREFIX) {
+ *preflen = 32;
+ }
+ *len = 4;
+ return 0;
+ }
+ return -1;
+}
+#endif
Index: src/sys/net/npf/lpm.h
diff -u /dev/null src/sys/net/npf/lpm.h:1.1.2.2
--- /dev/null Sun Dec 18 07:40:50 2016
+++ src/sys/net/npf/lpm.h Sun Dec 18 07:40:50 2016
@@ -0,0 +1,46 @@
+/*-
+ * Copyright (c) 2016 Mindaugas Rasiukevicius <rmind at noxt eu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _LPM_H_
+#define _LPM_H_
+
+__BEGIN_DECLS
+
+typedef struct lpm lpm_t;
+typedef void (*lpm_dtor_t)(void *, const void *, size_t, void *);
+
+lpm_t * lpm_create(void);
+void lpm_destroy(lpm_t *);
+void lpm_clear(lpm_t *, lpm_dtor_t, void *);
+
+int lpm_insert(lpm_t *, const void *, size_t, unsigned, void *);
+int lpm_remove(lpm_t *, const void *, size_t, unsigned);
+void * lpm_lookup(lpm_t *, const void *, size_t);
+int lpm_strtobin(const char *, void *, size_t *, unsigned *);
+
+__END_DECLS
+
+#endif