Module Name:    src
Committed By:   christos
Date:           Fri Dec  9 02:40:38 UTC 2016

Modified Files:
        src/sys/net/npf: files.npf npf_impl.h npf_tableset.c
Added Files:
        src/sys/net/npf: lpm.c lpm.h
Removed Files:
        src/sys/net/npf: npf_tableset_ptree.c

Log Message:
This patches ditches the ptree(3) library, because it is broken (you
can get missing entries!).  Instead, as a temporary solution, we switch
to a simple linear scan of the hash tables for the longest-prefix-match
(lpm.c lpm.h) algorithm. In fact, with few unique prefixes in the set,
on modern hardware this simple algorithm is pretty fast anyway!


To generate a diff of this commit:
cvs rdiff -u -r1.17 -r1.18 src/sys/net/npf/files.npf
cvs rdiff -u -r0 -r1.1 src/sys/net/npf/lpm.c src/sys/net/npf/lpm.h
cvs rdiff -u -r1.61 -r1.62 src/sys/net/npf/npf_impl.h
cvs rdiff -u -r1.23 -r1.24 src/sys/net/npf/npf_tableset.c
cvs rdiff -u -r1.1 -r0 src/sys/net/npf/npf_tableset_ptree.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/net/npf/files.npf
diff -u src/sys/net/npf/files.npf:1.17 src/sys/net/npf/files.npf:1.18
--- src/sys/net/npf/files.npf:1.17	Sat Jul 19 14:24:16 2014
+++ src/sys/net/npf/files.npf	Thu Dec  8 21:40:38 2016
@@ -1,4 +1,4 @@
-# $NetBSD: files.npf,v 1.17 2014/07/19 18:24:16 rmind Exp $
+# $NetBSD: files.npf,v 1.18 2016/12/09 02:40:38 christos Exp $
 #
 # Public Domain.
 #
@@ -19,7 +19,6 @@ file	net/npf/npf_bpf.c			npf
 file	net/npf/npf_ruleset.c			npf
 file	net/npf/npf_rproc.c			npf
 file	net/npf/npf_tableset.c			npf
-file	net/npf/npf_tableset_ptree.c		npf
 file	net/npf/npf_if.c			npf
 file	net/npf/npf_inet.c			npf
 file	net/npf/npf_conn.c			npf
@@ -31,6 +30,9 @@ file	net/npf/npf_alg.c			npf
 file	net/npf/npf_sendpkt.c			npf
 file	net/npf/npf_worker.c			npf
 
+# LPM
+file	net/npf/lpm.c				npf
+
 # Built-in extensions.
 file	net/npf/npf_ext_log.c			npf
 file	net/npf/npf_ext_normalize.c		npf

Index: src/sys/net/npf/npf_impl.h
diff -u src/sys/net/npf/npf_impl.h:1.61 src/sys/net/npf/npf_impl.h:1.62
--- src/sys/net/npf/npf_impl.h:1.61	Sun Feb  1 19:31:39 2015
+++ src/sys/net/npf/npf_impl.h	Thu Dec  8 21:40:38 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: npf_impl.h,v 1.61 2015/02/02 00:31:39 rmind Exp $	*/
+/*	$NetBSD: npf_impl.h,v 1.62 2016/12/09 02:40:38 christos Exp $	*/
 
 /*-
  * Copyright (c) 2009-2014 The NetBSD Foundation, Inc.
@@ -49,7 +49,6 @@
 
 #include <sys/types.h>
 #include <sys/queue.h>
-#include <sys/ptree.h>
 
 #include <net/bpf.h>
 #include <net/bpfjit.h>
@@ -228,8 +227,6 @@ bool		npf_bpf_validate(const void *, siz
 void		npf_tableset_sysinit(void);
 void		npf_tableset_sysfini(void);
 
-extern const pt_tree_ops_t npf_table_ptree_ops;
-
 npf_tableset_t *npf_tableset_create(u_int);
 void		npf_tableset_destroy(npf_tableset_t *);
 int		npf_tableset_insert(npf_tableset_t *, npf_table_t *);

Index: src/sys/net/npf/npf_tableset.c
diff -u src/sys/net/npf/npf_tableset.c:1.23 src/sys/net/npf/npf_tableset.c:1.24
--- src/sys/net/npf/npf_tableset.c:1.23	Wed Apr 20 11:46:08 2016
+++ src/sys/net/npf/npf_tableset.c	Thu Dec  8 21:40:38 2016
@@ -1,7 +1,7 @@
-/*	$NetBSD: npf_tableset.c,v 1.23 2016/04/20 15:46:08 christos Exp $	*/
+/*	$NetBSD: npf_tableset.c,v 1.24 2016/12/09 02:40:38 christos Exp $	*/
 
 /*-
- * Copyright (c) 2009-2014 The NetBSD Foundation, Inc.
+ * Copyright (c) 2009-2016 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This material is based upon work partially supported by The
@@ -41,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.23 2016/04/20 15:46:08 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: npf_tableset.c,v 1.24 2016/12/09 02:40:38 christos Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -58,13 +58,12 @@ __KERNEL_RCSID(0, "$NetBSD: npf_tableset
 #include <sys/types.h>
 
 #include "npf_impl.h"
+#include "lpm.h"
 
 typedef struct npf_tblent {
-	union {
-		LIST_ENTRY(npf_tblent) te_hashent;
-		pt_node_t	te_node;
-	} /* C11 */;
-	int			te_alen;
+	LIST_ENTRY(npf_tblent)	te_listent;
+	uint16_t		te_preflen;
+	uint16_t		te_alen;
 	npf_addr_t		te_addr;
 } npf_tblent_t;
 
@@ -81,7 +80,8 @@ struct npf_table {
 			u_long		t_hashmask;
 		};
 		struct {
-			pt_tree_t	t_tree[2];
+			lpm_t *		t_lpm;
+			LIST_HEAD(, npf_tblent) t_list;
 		};
 		struct {
 			void *		t_blob;
@@ -294,7 +294,7 @@ table_hash_lookup(const npf_table_t *t, 
 	 * Lookup the hash table and check for duplicates.
 	 * Note: mask is ignored for the hash storage.
 	 */
-	LIST_FOREACH(ent, htbl, te_hashent) {
+	LIST_FOREACH(ent, htbl, te_listent) {
 		if (ent->te_alen != alen) {
 			continue;
 		}
@@ -307,27 +307,28 @@ table_hash_lookup(const npf_table_t *t, 
 }
 
 static void
-table_hash_destroy(npf_table_t *t)
+table_hash_flush(npf_table_t *t)
 {
 	for (unsigned n = 0; n <= t->t_hashmask; n++) {
 		npf_tblent_t *ent;
 
 		while ((ent = LIST_FIRST(&t->t_hashl[n])) != NULL) {
-			LIST_REMOVE(ent, te_hashent);
+			LIST_REMOVE(ent, te_listent);
 			pool_cache_put(tblent_cache, ent);
 		}
 	}
 }
 
 static void
-table_tree_destroy(pt_tree_t *tree)
+table_tree_flush(npf_table_t *t)
 {
 	npf_tblent_t *ent;
 
-	while ((ent = ptree_iterate(tree, NULL, PT_ASCENDING)) != NULL) {
-		ptree_remove_node(tree, ent);
+	while ((ent = LIST_FIRST(&t->t_list)) != NULL) {
+		LIST_REMOVE(ent, te_listent);
 		pool_cache_put(tblent_cache, ent);
 	}
+	lpm_clear(t->t_lpm, NULL, NULL);
 }
 
 /*
@@ -339,35 +340,27 @@ npf_table_create(const char *name, u_int
 {
 	npf_table_t *t;
 
-	t = kmem_zalloc(sizeof(npf_table_t), KM_SLEEP);
+	t = kmem_zalloc(sizeof(*t), KM_SLEEP);
 	strlcpy(t->t_name, name, NPF_TABLE_MAXNAMELEN);
 
 	switch (type) {
 	case NPF_TABLE_TREE:
-		ptree_init(&t->t_tree[0], &npf_table_ptree_ops,
-		    (void *)(sizeof(struct in_addr) / sizeof(uint32_t)),
-		    offsetof(npf_tblent_t, te_node),
-		    offsetof(npf_tblent_t, te_addr));
-		ptree_init(&t->t_tree[1], &npf_table_ptree_ops,
-		    (void *)(sizeof(struct in6_addr) / sizeof(uint32_t)),
-		    offsetof(npf_tblent_t, te_node),
-		    offsetof(npf_tblent_t, te_addr));
+		if ((t->t_lpm = lpm_create()) == NULL)
+			goto out;
+		LIST_INIT(&t->t_list);
 		break;
 	case NPF_TABLE_HASH:
 		t->t_hashl = hashinit(1024, HASH_LIST, true, &t->t_hashmask);
-		if (t->t_hashl == NULL) {
-			kmem_free(t, sizeof(npf_table_t));
-			return NULL;
-		}
+		if (t->t_hashl == NULL)
+			goto out;
 		break;
 	case NPF_TABLE_CDB:
 		t->t_blob = blob;
 		t->t_bsize = size;
 		t->t_cdb = cdbr_open_mem(blob, size, CDBR_DEFAULT, NULL, NULL);
 		if (t->t_cdb == NULL) {
-			kmem_free(t, sizeof(npf_table_t));
 			free(blob, M_TEMP);
-			return NULL;
+			goto out;
 		}
 		t->t_nitems = cdbr_entries(t->t_cdb);
 		break;
@@ -379,6 +372,10 @@ npf_table_create(const char *name, u_int
 	t->t_id = tid;
 
 	return t;
+out:
+	kmem_free(t, sizeof(*t));
+	return NULL;
+	
 }
 
 /*
@@ -391,12 +388,12 @@ npf_table_destroy(npf_table_t *t)
 
 	switch (t->t_type) {
 	case NPF_TABLE_HASH:
-		table_hash_destroy(t);
+		table_hash_flush(t);
 		hashdone(t->t_hashl, HASH_LIST, t->t_hashmask);
 		break;
 	case NPF_TABLE_TREE:
-		table_tree_destroy(&t->t_tree[0]);
-		table_tree_destroy(&t->t_tree[1]);
+		table_tree_flush(t);
+		lpm_destroy(t->t_lpm);
 		break;
 	case NPF_TABLE_CDB:
 		cdbr_close(t->t_cdb);
@@ -406,7 +403,7 @@ npf_table_destroy(npf_table_t *t)
 		KASSERT(false);
 	}
 	rw_destroy(&t->t_lock);
-	kmem_free(t, sizeof(npf_table_t));
+	kmem_free(t, sizeof(*t));
 }
 
 /*
@@ -494,7 +491,7 @@ npf_table_insert(npf_table_t *t, const i
 			break;
 		}
 		if (!table_hash_lookup(t, addr, alen, &htbl)) {
-			LIST_INSERT_HEAD(htbl, ent, te_hashent);
+			LIST_INSERT_HEAD(htbl, ent, te_listent);
 			t->t_nitems++;
 		} else {
 			error = EEXIST;
@@ -502,16 +499,12 @@ npf_table_insert(npf_table_t *t, const i
 		break;
 	}
 	case NPF_TABLE_TREE: {
-		pt_tree_t *tree = &t->t_tree[aidx];
-		bool ok;
-
-		/*
-		 * If no mask specified, use maximum mask.
-		 */
-		ok = (mask != NPF_NO_NETMASK) ?
-		    ptree_insert_mask_node(tree, ent, mask) :
-		    ptree_insert_node(tree, ent);
-		if (ok) {
+		const unsigned preflen =
+		    (mask == NPF_NO_NETMASK) ? (alen * 8) : mask;
+		if (lpm_lookup(t->t_lpm, addr, alen) == NULL &&
+		    lpm_insert(t->t_lpm, addr, alen, preflen, ent) == 0) {
+			LIST_INSERT_HEAD(&t->t_list, ent, te_listent);
+			ent->te_preflen = preflen;
 			t->t_nitems++;
 			error = 0;
 		} else {
@@ -556,17 +549,17 @@ npf_table_remove(npf_table_t *t, const i
 
 		ent = table_hash_lookup(t, addr, alen, &htbl);
 		if (__predict_true(ent != NULL)) {
-			LIST_REMOVE(ent, te_hashent);
+			LIST_REMOVE(ent, te_listent);
 			t->t_nitems--;
 		}
 		break;
 	}
 	case NPF_TABLE_TREE: {
-		pt_tree_t *tree = &t->t_tree[aidx];
-
-		ent = ptree_find_node(tree, addr);
+		ent = lpm_lookup(t->t_lpm, addr, alen);
 		if (__predict_true(ent != NULL)) {
-			ptree_remove_node(tree, ent);
+			LIST_REMOVE(ent, te_listent);
+			lpm_remove(t->t_lpm, &ent->te_addr,
+			    ent->te_alen, ent->te_preflen);
 			t->t_nitems--;
 		}
 		break;
@@ -611,7 +604,7 @@ npf_table_lookup(npf_table_t *t, const i
 		break;
 	case NPF_TABLE_TREE:
 		rw_enter(&t->t_lock, RW_READER);
-		found = ptree_find_node(&t->t_tree[aidx], addr) != NULL;
+		found = lpm_lookup(t->t_lpm, addr, alen) != NULL;
 		rw_exit(&t->t_lock);
 		break;
 	case NPF_TABLE_CDB:
@@ -655,7 +648,7 @@ table_hash_list(const npf_table_t *t, vo
 	for (unsigned n = 0; n <= t->t_hashmask; n++) {
 		npf_tblent_t *ent;
 
-		LIST_FOREACH(ent, &t->t_hashl[n], te_hashent) {
+		LIST_FOREACH(ent, &t->t_hashl[n], te_listent) {
 			error = table_ent_copyout(&ent->te_addr,
 			    ent->te_alen, 0, ubuf, len, &off);
 			if (error)
@@ -666,20 +659,15 @@ table_hash_list(const npf_table_t *t, vo
 }
 
 static int
-table_tree_list(pt_tree_t *tree, npf_netmask_t maxmask, void *ubuf,
-    size_t len, size_t *off)
+table_tree_list(const npf_table_t *t, void *ubuf, size_t len)
 {
-	npf_tblent_t *ent = NULL;
+	npf_tblent_t *ent;
+	size_t off = 0;
 	int error = 0;
 
-	while ((ent = ptree_iterate(tree, ent, PT_ASCENDING)) != NULL) {
-		pt_bitlen_t blen;
-
-		if (!ptree_mask_node_p(tree, ent, &blen)) {
-			blen = maxmask;
-		}
-		error = table_ent_copyout(&ent->te_addr, ent->te_alen,
-		    blen, ubuf, len, off);
+	LIST_FOREACH(ent, &t->t_list, te_listent) {
+		error = table_ent_copyout(&ent->te_addr,
+		    ent->te_alen, 0, ubuf, len, &off);
 		if (error)
 			break;
 	}
@@ -710,7 +698,6 @@ table_cdb_list(npf_table_t *t, void *ubu
 int
 npf_table_list(npf_table_t *t, void *ubuf, size_t len)
 {
-	size_t off = 0;
 	int error = 0;
 
 	rw_enter(&t->t_lock, RW_READER);
@@ -719,10 +706,7 @@ npf_table_list(npf_table_t *t, void *ubu
 		error = table_hash_list(t, ubuf, len);
 		break;
 	case NPF_TABLE_TREE:
-		error = table_tree_list(&t->t_tree[0], 32, ubuf, len, &off);
-		if (error)
-			break;
-		error = table_tree_list(&t->t_tree[1], 128, ubuf, len, &off);
+		error = table_tree_list(t, ubuf, len);
 		break;
 	case NPF_TABLE_CDB:
 		error = table_cdb_list(t, ubuf, len);
@@ -746,12 +730,11 @@ npf_table_flush(npf_table_t *t)
 	rw_enter(&t->t_lock, RW_WRITER);
 	switch (t->t_type) {
 	case NPF_TABLE_HASH:
-		table_hash_destroy(t);
+		table_hash_flush(t);
 		t->t_nitems = 0;
 		break;
 	case NPF_TABLE_TREE:
-		table_tree_destroy(&t->t_tree[0]);
-		table_tree_destroy(&t->t_tree[1]);
+		table_tree_flush(t);
 		t->t_nitems = 0;
 		break;
 	case NPF_TABLE_CDB:

Added files:

Index: src/sys/net/npf/lpm.c
diff -u /dev/null src/sys/net/npf/lpm.c:1.1
--- /dev/null	Thu Dec  8 21:40:38 2016
+++ src/sys/net/npf/lpm.c	Thu Dec  8 21:40:38 2016
@@ -0,0 +1,401 @@
+/*-
+ * Copyright (c) 2016 Mindaugas Rasiukevicius <rmind at noxt eu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * TODO: Simple linear scan for now (works just well with a few prefixes).
+ * TBD on a better algorithm.
+ */
+
+#if defined(_KERNEL)
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: lpm.c,v 1.1 2016/12/09 02:40:38 christos Exp $");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/kmem.h>
+#else
+#include <sys/socket.h>
+#include <arpa/inet.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+#include <strings.h>
+#include <errno.h>
+#include <assert.h>
+#define kmem_alloc(a, b) malloc(a)
+#define kmem_free(a, b) free(a)
+#define kmem_zalloc(a, b) calloc(a, 1)
+#endif
+
+#include "lpm.h"
+
+#define	LPM_MAX_PREFIX		(128)
+#define	LPM_MAX_WORDS		(LPM_MAX_PREFIX >> 5)
+#define	LPM_TO_WORDS(x)		((x) >> 2)
+#define	LPM_HASH_STEP		(8)
+
+#ifdef DEBUG
+#define	ASSERT	assert
+#else
+#define	ASSERT
+#endif
+
+typedef struct lpm_ent {
+	struct lpm_ent *next;
+	void *		val;
+	unsigned	len;
+	uint8_t		key[];
+} lpm_ent_t;
+
+typedef struct {
+	uint32_t	hashsize;
+	uint32_t	nitems;
+	lpm_ent_t **bucket;
+} lpm_hmap_t;
+
+struct lpm {
+	uint32_t	bitmask[LPM_MAX_WORDS];
+	void *		defval;
+	lpm_hmap_t	prefix[LPM_MAX_PREFIX + 1];
+};
+
+lpm_t *
+lpm_create(void)
+{
+	return kmem_zalloc(sizeof(lpm_t), KM_SLEEP);
+}
+
+void
+lpm_clear(lpm_t *lpm, lpm_dtor_t dtor, void *arg)
+{
+	for (unsigned n = 0; n <= LPM_MAX_PREFIX; n++) {
+		lpm_hmap_t *hmap = &lpm->prefix[n];
+
+		if (!hmap->hashsize) {
+			KASSERT(!hmap->bucket);
+			continue;
+		}
+		for (unsigned i = 0; i < hmap->hashsize; i++) {
+			lpm_ent_t *entry = hmap->bucket[i];
+
+			while (entry) {
+				lpm_ent_t *next = entry->next;
+
+				if (dtor) {
+					dtor(arg, entry->key,
+					    entry->len, entry->val);
+				}
+				kmem_free(entry, 
+				    offsetof(lpm_ent_t, key[entry->len]));
+				entry = next;
+			}
+		}
+		kmem_free(hmap->bucket, hmap->hashsize);
+		hmap->bucket = NULL;
+		hmap->hashsize = 0;
+		hmap->nitems = 0;
+	}
+	memset(lpm->bitmask, 0, sizeof(lpm->bitmask));
+	lpm->defval = NULL;
+}
+
+void
+lpm_destroy(lpm_t *lpm)
+{
+	lpm_clear(lpm, NULL, NULL);
+	kmem_free(lpm, sizeof(*lpm));
+}
+
+/*
+ * fnv1a_hash: Fowler-Noll-Vo hash function (FNV-1a variant).
+ */
+static uint32_t
+fnv1a_hash(const void *buf, size_t len)
+{
+	uint32_t hash = 2166136261UL;
+	const uint8_t *p = buf;
+
+	while (len--) {
+		hash ^= *p++;
+		hash *= 16777619U;
+	}
+	return hash;
+}
+
+static bool
+hashmap_rehash(lpm_hmap_t *hmap, uint32_t size)
+{
+	lpm_ent_t **bucket;
+	uint32_t hashsize;
+
+	for (hashsize = 1; hashsize < size; hashsize <<= 1) {
+		continue;
+	}
+	bucket = kmem_zalloc(hashsize * sizeof(*bucket), KM_SLEEP);
+	if (bucket == NULL)
+		return false;
+	for (unsigned n = 0; n < hmap->hashsize; n++) {
+		lpm_ent_t *list = hmap->bucket[n];
+
+		while (list) {
+			lpm_ent_t *entry = list;
+			uint32_t hash = fnv1a_hash(entry->key, entry->len);
+			const size_t i = hash & (hashsize - 1);
+
+			list = entry->next;
+			entry->next = bucket[i];
+			bucket[i] = entry;
+		}
+	}
+	if (hmap->bucket)
+		kmem_free(hmap->bucket, hmap->hashsize);
+	hmap->bucket = bucket;
+	hmap->hashsize = hashsize;
+	return true;
+}
+
+static lpm_ent_t *
+hashmap_insert(lpm_hmap_t *hmap, const void *key, size_t len)
+{
+	const uint32_t target = hmap->nitems + LPM_HASH_STEP;
+	const size_t entlen = offsetof(lpm_ent_t, key[len]);
+	uint32_t hash, i;
+	lpm_ent_t *entry;
+
+	if (hmap->hashsize < target && !hashmap_rehash(hmap, target)) {
+		return NULL;
+	}
+
+	hash = fnv1a_hash(key, len);
+	i = hash & (hmap->hashsize - 1);
+	entry = hmap->bucket[i];
+	while (entry) {
+		if (entry->len == len && memcmp(entry->key, key, len) == 0) {
+			return entry;
+		}
+		entry = entry->next;
+	}
+
+	if ((entry = kmem_alloc(entlen, KM_SLEEP)) == NULL)
+		return NULL;
+
+	memcpy(entry->key, key, len);
+	entry->next = hmap->bucket[i];
+	entry->len = len;
+
+	hmap->bucket[i] = entry;
+	hmap->nitems++;
+	return entry;
+}
+
+static lpm_ent_t *
+hashmap_lookup(lpm_hmap_t *hmap, const void *key, size_t len)
+{
+	const uint32_t hash = fnv1a_hash(key, len);
+	const uint32_t i = hash & (hmap->hashsize - 1);
+	lpm_ent_t *entry = hmap->bucket[i];
+
+	while (entry) {
+		if (entry->len == len && memcmp(entry->key, key, len) == 0) {
+			return entry;
+		}
+		entry = entry->next;
+	}
+	return NULL;
+}
+
+static int
+hashmap_remove(lpm_hmap_t *hmap, const void *key, size_t len)
+{
+	const uint32_t hash = fnv1a_hash(key, len);
+	const uint32_t i = hash & (hmap->hashsize - 1);
+	lpm_ent_t *prev = NULL, *entry = hmap->bucket[i];
+
+	while (entry) {
+		if (entry->len == len && memcmp(entry->key, key, len) == 0) {
+			if (prev) {
+				prev->next = entry->next;
+			} else {
+				hmap->bucket[i] = entry->next;
+			}
+			free(entry, M_TEMP);
+			return 0;
+		}
+		prev = entry;
+		entry = entry->next;
+	}
+	return -1;
+}
+
+/*
+ * compute_prefix: given the address and prefix length, compute and
+ * return the address prefix.
+ */
+static inline void
+compute_prefix(const unsigned nwords, const uint32_t *addr,
+    unsigned preflen, uint32_t *prefix)
+{
+	uint32_t addr2[4];
+
+	if ((uintptr_t)addr & 3) {
+		/* Unaligned address: just copy for now. */
+		memcpy(addr2, addr, nwords * 4);
+		addr = addr2;
+	}
+	for (unsigned i = 0; i < nwords; i++) {
+		if (preflen == 0) {
+			prefix[i] = 0;
+			continue;
+		}
+		if (preflen < 32) {
+			uint32_t mask = htonl(0xffffffff << (32 - preflen));
+			prefix[i] = addr[i] & mask;
+			preflen = 0;
+		} else {
+			prefix[i] = addr[i];
+			preflen -= 32;
+		}
+	}
+}
+
+/*
+ * lpm_insert: insert the CIDR into the LPM table.
+ *
+ * => Returns zero on success and -1 on failure.
+ */
+int
+lpm_insert(lpm_t *lpm, const void *addr,
+    size_t len, unsigned preflen, void *val)
+{
+	const unsigned nwords = LPM_TO_WORDS(len);
+	uint32_t prefix[LPM_MAX_WORDS];
+	lpm_ent_t *entry;
+
+	if (preflen == 0) {
+		/* Default is a special case. */
+		lpm->defval = val;
+		return 0;
+	}
+	compute_prefix(nwords, addr, preflen, prefix);
+	entry = hashmap_insert(&lpm->prefix[preflen], prefix, len);
+	if (entry) {
+		const unsigned n = --preflen >> 5;
+		lpm->bitmask[n] |= 0x80000000U >> (preflen & 31);
+		entry->val = val;
+		return 0;
+	}
+	return -1;
+}
+
+/*
+ * lpm_remove: remove the specified prefix.
+ */
+int
+lpm_remove(lpm_t *lpm, const void *addr, size_t len, unsigned preflen)
+{
+	const unsigned nwords = LPM_TO_WORDS(len);
+	uint32_t prefix[LPM_MAX_WORDS];
+
+	if (preflen == 0) {
+		lpm->defval = NULL;
+		return 0;
+	}
+	compute_prefix(nwords, addr, preflen, prefix);
+	return hashmap_remove(&lpm->prefix[preflen], prefix, len);
+}
+
+/*
+ * lpm_lookup: find the longest matching prefix given the IP address.
+ *
+ * => Returns the associated value on success or NULL on failure.
+ */
+void *
+lpm_lookup(lpm_t *lpm, const void *addr, size_t len)
+{
+	const unsigned nwords = LPM_TO_WORDS(len);
+	unsigned i, n = nwords;
+	uint32_t prefix[LPM_MAX_WORDS];
+
+	while (n--) {
+		uint32_t bitmask = lpm->bitmask[n];
+
+		while ((i = ffs(bitmask)) != 0) {
+			const unsigned preflen = (32 * n) + (32 - --i);
+			lpm_hmap_t *hmap = &lpm->prefix[preflen];
+			lpm_ent_t *entry;
+
+			compute_prefix(nwords, addr, preflen, prefix);
+			entry = hashmap_lookup(hmap, prefix, len);
+			if (entry) {
+				return entry->val;
+			}
+			bitmask &= ~(1U << i);
+		}
+	}
+	return lpm->defval;
+}
+
+#if !defined(_KERNEL)
+/*
+ * lpm_strtobin: convert CIDR string to the binary IP address and mask.
+ *
+ * => The address will be in the network byte order.
+ * => Returns 0 on success or -1 on failure.
+ */
+int
+lpm_strtobin(const char *cidr, void *addr, size_t *len, unsigned *preflen)
+{
+	char *p, buf[INET6_ADDRSTRLEN];
+
+	strncpy(buf, cidr, sizeof(buf));
+	buf[sizeof(buf) - 1] = '\0';
+
+	if ((p = strchr(buf, '/')) != NULL) {
+		const ptrdiff_t off = p - buf;
+		*preflen = atoi(&buf[off + 1]);
+		buf[off] = '\0';
+	} else {
+		*preflen = LPM_MAX_PREFIX;
+	}
+
+	if (inet_pton(AF_INET6, buf, addr) == 1) {
+		*len = 16;
+		return 0;
+	}
+	if (inet_pton(AF_INET, buf, addr) == 1) {
+		if (*preflen == LPM_MAX_PREFIX) {
+			*preflen = 32;
+		}
+		*len = 4;
+		return 0;
+	}
+	return -1;
+}
+#endif
Index: src/sys/net/npf/lpm.h
diff -u /dev/null src/sys/net/npf/lpm.h:1.1
--- /dev/null	Thu Dec  8 21:40:38 2016
+++ src/sys/net/npf/lpm.h	Thu Dec  8 21:40:38 2016
@@ -0,0 +1,46 @@
+/*-
+ * Copyright (c) 2016 Mindaugas Rasiukevicius <rmind at noxt eu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _LPM_H_
+#define _LPM_H_
+
+__BEGIN_DECLS
+
+typedef struct lpm lpm_t;
+typedef void (*lpm_dtor_t)(void *, const void *, size_t, void *);
+
+lpm_t *		lpm_create(void);
+void		lpm_destroy(lpm_t *);
+void		lpm_clear(lpm_t *, lpm_dtor_t, void *);
+
+int		lpm_insert(lpm_t *, const void *, size_t, unsigned, void *);
+int		lpm_remove(lpm_t *, const void *, size_t, unsigned);
+void *		lpm_lookup(lpm_t *, const void *, size_t);
+int		lpm_strtobin(const char *, void *, size_t *, unsigned *);
+
+__END_DECLS
+
+#endif

Reply via email to