Module Name: src Committed By: joerg Date: Sat Aug 15 16:21:05 UTC 2009
Modified Files: src/distrib/sets/lists/comp: mi src/usr.bin: Makefile Added Files: src/usr.bin/nbperf: Makefile graph2.c graph2.h graph3.c graph3.h nbperf-bdz.c nbperf-chm.c nbperf-chm3.c nbperf.1 nbperf.c nbperf.h Log Message: Add nbperf(1), a minimal perfect hash function generator. Implemented are the 3-graph BDZ algorithm as well as the 2-graph and 3-graph CHM algorithms. All algorithms have expected linear run time and the smallest functions need around 2.85 bit/key. To generate a diff of this commit: cvs rdiff -u -r1.1292 -r1.1293 src/distrib/sets/lists/comp/mi cvs rdiff -u -r1.177 -r1.178 src/usr.bin/Makefile cvs rdiff -u -r0 -r1.1 src/usr.bin/nbperf/Makefile \ src/usr.bin/nbperf/graph2.c src/usr.bin/nbperf/graph2.h \ src/usr.bin/nbperf/graph3.c src/usr.bin/nbperf/graph3.h \ src/usr.bin/nbperf/nbperf-bdz.c src/usr.bin/nbperf/nbperf-chm.c \ src/usr.bin/nbperf/nbperf-chm3.c src/usr.bin/nbperf/nbperf.1 \ src/usr.bin/nbperf/nbperf.c src/usr.bin/nbperf/nbperf.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/distrib/sets/lists/comp/mi diff -u src/distrib/sets/lists/comp/mi:1.1292 src/distrib/sets/lists/comp/mi:1.1293 --- src/distrib/sets/lists/comp/mi:1.1292 Sat Aug 15 09:52:57 2009 +++ src/distrib/sets/lists/comp/mi Sat Aug 15 16:21:04 2009 @@ -1,4 +1,4 @@ -# $NetBSD: mi,v 1.1292 2009/08/15 09:52:57 mbalmer Exp $ +# $NetBSD: mi,v 1.1293 2009/08/15 16:21:04 joerg Exp $ # # Note: don't delete entries from here - mark them as "obsolete" instead. # @@ -57,6 +57,7 @@ ./usr/bin/msgmerge comp-c-bin ./usr/bin/msgunfmt comp-c-bin ./usr/bin/msguniq comp-c-bin +./usr/bin/nbperf comp-util-bin ./usr/bin/nm comp-util-bin bfd ./usr/bin/objcopy comp-util-bin bfd ./usr/bin/objdump comp-util-bin bfd @@ -2984,6 +2985,7 @@ ./usr/libdata/debug/usr/bin/msgs.debug comp-util-debug debug ./usr/libdata/debug/usr/bin/msgunfmt.debug comp-c-debug debug ./usr/libdata/debug/usr/bin/msguniq.debug comp-c-debug debug +./usr/libdata/debug/usr/bin/nbperf.debug comp-util-debug debug ./usr/libdata/debug/usr/bin/nbsvtool.debug comp-crypto-debug crypto,debug ./usr/libdata/debug/usr/bin/netgroup.debug comp-nis-debug debug ./usr/libdata/debug/usr/bin/netpgp.debug comp-crypto-debug crypto,debug @@ -3811,6 +3813,7 @@ ./usr/share/man/cat1/msg_table_add.0 comp-obsolete obsolete ./usr/share/man/cat1/msg_window.0 comp-obsolete obsolete ./usr/share/man/cat1/msgc.0 comp-c-catman .cat +./usr/share/man/cat1/nbperf.0 comp-util-catman .cat ./usr/share/man/cat1/nm.0 comp-util-catman bfd,.cat ./usr/share/man/cat1/objcopy.0 comp-util-catman bfd,.cat ./usr/share/man/cat1/objdump.0 comp-util-catman bfd,.cat @@ -9406,6 +9409,7 @@ ./usr/share/man/html1/menuc.html comp-c-htmlman html ./usr/share/man/html1/mkstr.html comp-c-htmlman html ./usr/share/man/html1/msgc.html comp-c-htmlman html +./usr/share/man/html1/nbperf.html comp-util-htmlman html ./usr/share/man/html1/nm.html comp-util-htmlman bfd,html ./usr/share/man/html1/objcopy.html comp-util-htmlman bfd,html ./usr/share/man/html1/objdump.html comp-util-htmlman bfd,html @@ -14765,6 +14769,7 @@ ./usr/share/man/man1/msg_table_add.1 comp-obsolete obsolete ./usr/share/man/man1/msg_window.1 comp-obsolete obsolete ./usr/share/man/man1/msgc.1 comp-c-man .man +./usr/share/man/man1/nbperf.1 comp-util-man .man ./usr/share/man/man1/nm.1 comp-util-man bfd,.man ./usr/share/man/man1/objcopy.1 comp-util-man bfd,.man ./usr/share/man/man1/objdump.1 comp-util-man bfd,.man Index: src/usr.bin/Makefile diff -u src/usr.bin/Makefile:1.177 src/usr.bin/Makefile:1.178 --- src/usr.bin/Makefile:1.177 Mon Jul 20 17:34:41 2009 +++ src/usr.bin/Makefile Sat Aug 15 16:21:04 2009 @@ -1,4 +1,4 @@ -# $NetBSD: Makefile,v 1.177 2009/07/20 17:34:41 christos Exp $ +# $NetBSD: Makefile,v 1.178 2009/08/15 16:21:04 joerg Exp $ # from: @(#)Makefile 8.3 (Berkeley) 1/7/94 .include <bsd.own.mk> @@ -17,7 +17,7 @@ lex locale locate lock logger login logname look lorder m4 \ machine mail make man menuc mesg midiplay mixerctl mkcsmapper \ mkdep mkesdb mkfifo mklocale mkstr mktemp moduli msgc msgs \ - netgroup netstat newgrp newsyslog nfsstat nice nl nohup nvi \ + nbperf netgroup netstat newgrp newsyslog nfsstat nice nl nohup nvi \ pagesize passwd paste patch pathchk pkill pmap pmc pr \ printenv printf progress pwhash qsubst quota radioctl rdist \ renice rev revoke rfcomm_sppd rlogin rpcgen rpcinfo rs rsh rup \ Added files: Index: src/usr.bin/nbperf/Makefile diff -u /dev/null src/usr.bin/nbperf/Makefile:1.1 --- /dev/null Sat Aug 15 16:21:05 2009 +++ src/usr.bin/nbperf/Makefile Sat Aug 15 16:21:04 2009 @@ -0,0 +1,8 @@ +# $NetBSD: Makefile,v 1.1 2009/08/15 16:21:04 joerg Exp $ + +PROG= nbperf +SRCS= nbperf.c +SRCS+= nbperf-bdz.c nbperf-chm.c nbperf-chm3.c +SRCS+= graph2.c graph3.c + +.include <bsd.prog.mk> Index: src/usr.bin/nbperf/graph2.c diff -u /dev/null src/usr.bin/nbperf/graph2.c:1.1 --- /dev/null Sat Aug 15 16:21:05 2009 +++ src/usr.bin/nbperf/graph2.c Sat Aug 15 16:21:04 2009 @@ -0,0 +1,172 @@ +/* $NetBSD: graph2.c,v 1.1 2009/08/15 16:21:04 joerg Exp $ */ +/*- + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Joerg Sonnenberger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__RCSID("$NetBSD: graph2.c,v 1.1 2009/08/15 16:21:04 joerg Exp $"); + +#include <err.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> + +#include "nbperf.h" +#include "graph2.h" + +static const uint32_t unused = 0xffffffffU; + +void +graph2_setup(struct graph2 *graph, uint32_t v, uint32_t e) +{ + graph->v = v; + graph->e = e; + + graph->verts = calloc(sizeof(struct vertex2), v); + graph->edges = calloc(sizeof(struct edge2), e); + graph->output_order = calloc(sizeof(uint32_t), e); + + if (graph->verts == NULL || graph->edges == NULL || + graph->output_order == NULL) + err(1, "malloc failed"); +} + +void +graph2_free(struct graph2 *graph) +{ + free(graph->verts); + free(graph->edges); + free(graph->output_order); + + graph->verts = NULL; + graph->edges = NULL; + graph->output_order = NULL; +} + +int +graph2_hash(struct nbperf *nbperf, struct graph2 *graph) +{ + struct vertex2 *v; + uint32_t hashes[nbperf->hash_size]; + size_t i; + + for (i = 0; i < graph->e; ++i) { + (*nbperf->compute_hash)(nbperf, + nbperf->keys[i], nbperf->keylens[i], hashes); + graph->edges[i].left = hashes[0] % graph->v; + graph->edges[i].right = hashes[1] % graph->v; + if (graph->edges[i].left == graph->edges[i].right) + return -1; + } + + for (i = 0; i < graph->v; ++i) { + graph->verts[i].l_edge = unused; + graph->verts[i].r_edge = unused; + } + + for (i = 0; i < graph->e; ++i) { + v = &graph->verts[graph->edges[i].left]; + if (v->l_edge != unused) + graph->edges[v->l_edge].l_prev = i; + graph->edges[i].l_next = v->l_edge; + graph->edges[i].l_prev = unused; + v->l_edge = i; + + v = &graph->verts[graph->edges[i].right]; + if (v->r_edge != unused) + graph->edges[v->r_edge].r_prev = i; + graph->edges[i].r_next = v->r_edge; + graph->edges[i].r_prev = unused; + v->r_edge = i; + } + + return 0; +} + +static void +graph2_remove_vertex(struct graph2 *graph, struct vertex2 *v) +{ + struct edge2 *e; + struct vertex2 *v2; + + for (;;) { + if (v->l_edge != unused && v->r_edge != unused) + break; + if (v->l_edge == unused && v->r_edge == unused) + break; + + if (v->l_edge != unused) { + e = &graph->edges[v->l_edge]; + if (e->l_next != unused) + break; + v->l_edge = unused; /* No other elements possible! */ + v2 = &graph->verts[e->right]; + if (e->r_prev == unused) + v2->r_edge = e->r_next; + else + graph->edges[e->r_prev].r_next = e->r_next; + if (e->r_next != unused) + graph->edges[e->r_next].r_prev = e->r_prev; + v = v2; + } else { + e = &graph->edges[v->r_edge]; + if (e->r_next != unused) + break; + v->r_edge = unused; /* No other elements possible! */ + v2 = &graph->verts[e->left]; + if (e->l_prev == unused) + v2->l_edge = e->l_next; + else + graph->edges[e->l_prev].l_next = e->l_next; + if (e->l_next != unused) + graph->edges[e->l_next].l_prev = e->l_prev; + v = v2; + } + + graph->output_order[--graph->output_index] = e - graph->edges; + } +} + +int +graph2_output_order(struct graph2 *graph) +{ + size_t i; + + graph->output_index = graph->e; + + for (i = 0; i < graph->v; ++i) + graph2_remove_vertex(graph, &graph->verts[i]); + + if (graph->output_index != 0) + return -1; + + return 0; +} Index: src/usr.bin/nbperf/graph2.h diff -u /dev/null src/usr.bin/nbperf/graph2.h:1.1 --- /dev/null Sat Aug 15 16:21:05 2009 +++ src/usr.bin/nbperf/graph2.h Sat Aug 15 16:21:05 2009 @@ -0,0 +1,63 @@ +/* $NetBSD: graph2.h,v 1.1 2009/08/15 16:21:05 joerg Exp $ */ +/*- + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Joerg Sonnenberger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Implementation of common 2-graph routines: + * - build a 2-graph with hash-pairs as edges + * - check a 2-graph for acyclicness and compute an output order + */ + +struct vertex2 { + uint32_t l_edge, r_edge; +}; + +struct edge2 { + uint32_t left, right; + uint32_t l_prev, l_next; + uint32_t r_prev, r_next; +}; + +struct graph2 { + struct vertex2 *verts; + struct edge2 *edges; + uint32_t output_index; + uint32_t *output_order; + uint8_t *visited; + uint32_t e, v; +}; + +void graph2_setup(struct graph2 *, uint32_t, uint32_t); +void graph2_free(struct graph2 *); + +int graph2_hash(struct nbperf *, struct graph2 *); +int graph2_output_order(struct graph2 *graph); Index: src/usr.bin/nbperf/graph3.c diff -u /dev/null src/usr.bin/nbperf/graph3.c:1.1 --- /dev/null Sat Aug 15 16:21:05 2009 +++ src/usr.bin/nbperf/graph3.c Sat Aug 15 16:21:05 2009 @@ -0,0 +1,210 @@ +/* $NetBSD: graph3.c,v 1.1 2009/08/15 16:21:05 joerg Exp $ */ +/*- + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Joerg Sonnenberger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__RCSID("$NetBSD: graph3.c,v 1.1 2009/08/15 16:21:05 joerg Exp $"); + +#include <err.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> + +#include "nbperf.h" +#include "graph3.h" + +static const uint32_t unused = 0xffffffffU; + +void +graph3_setup(struct graph3 *graph, uint32_t v, uint32_t e) +{ + graph->v = v; + graph->e = e; + + graph->verts = calloc(sizeof(struct vertex3), v); + graph->edges = calloc(sizeof(struct edge3), e); + graph->output_order = calloc(sizeof(uint32_t), e); + + if (graph->verts == NULL || graph->edges == NULL || + graph->output_order == NULL) + err(1, "malloc failed"); +} + +void +graph3_free(struct graph3 *graph) +{ + free(graph->verts); + free(graph->edges); + free(graph->output_order); + + graph->verts = NULL; + graph->edges = NULL; + graph->output_order = NULL; +} + +int +graph3_hash(struct nbperf *nbperf, struct graph3 *graph) +{ + struct vertex3 *v; + uint32_t hashes[nbperf->hash_size]; + size_t i; + + for (i = 0; i < graph->e; ++i) { + (*nbperf->compute_hash)(nbperf, + nbperf->keys[i], nbperf->keylens[i], hashes); + graph->edges[i].left = hashes[0] % graph->v; + graph->edges[i].middle = hashes[1] % graph->v; + graph->edges[i].right = hashes[2] % graph->v; + if (graph->edges[i].left == graph->edges[i].middle) + return -1; + if (graph->edges[i].left == graph->edges[i].right) + return -1; + if (graph->edges[i].middle == graph->edges[i].right) + return -1; + } + + for (i = 0; i < graph->v; ++i) { + graph->verts[i].l_edge = unused; + graph->verts[i].m_edge = unused; + graph->verts[i].r_edge = unused; + } + + for (i = 0; i < graph->e; ++i) { + v = &graph->verts[graph->edges[i].left]; + if (v->l_edge != unused) + graph->edges[v->l_edge].l_prev = i; + graph->edges[i].l_next = v->l_edge; + graph->edges[i].l_prev = unused; + v->l_edge = i; + + v = &graph->verts[graph->edges[i].middle]; + if (v->m_edge != unused) + graph->edges[v->m_edge].m_prev = i; + graph->edges[i].m_next = v->m_edge; + graph->edges[i].m_prev = unused; + v->m_edge = i; + + v = &graph->verts[graph->edges[i].right]; + if (v->r_edge != unused) + graph->edges[v->r_edge].r_prev = i; + graph->edges[i].r_next = v->r_edge; + graph->edges[i].r_prev = unused; + v->r_edge = i; + } + + return 0; +} + +static void +graph3_remove_vertex(struct graph3 *graph, struct vertex3 *v) +{ + struct edge3 *e; + struct vertex3 *vl, *vm, *vr; + + if (v->l_edge != unused && v->m_edge != unused) + return; + if (v->l_edge != unused && v->r_edge != unused) + return; + if (v->m_edge != unused && v->r_edge != unused) + return; + if (v->l_edge == unused && v->m_edge == unused && v->r_edge == unused) + return; + + if (v->l_edge != unused) { + e = &graph->edges[v->l_edge]; + if (e->l_next != unused) + return; + } else if (v->m_edge != unused) { + e = &graph->edges[v->m_edge]; + if (e->m_next != unused) + return; + } else { + if (v->r_edge == unused) + abort(); + e = &graph->edges[v->r_edge]; + if (e->r_next != unused) + return; + } + + graph->output_order[--graph->output_index] = e - graph->edges; + + vl = &graph->verts[e->left]; + vm = &graph->verts[e->middle]; + vr = &graph->verts[e->right]; + + if (e->l_prev == unused) + vl->l_edge = e->l_next; + else + graph->edges[e->l_prev].l_next = e->l_next; + if (e->l_next != unused) + graph->edges[e->l_next].l_prev = e->l_prev; + + if (e->m_prev == unused) + vm->m_edge = e->m_next; + else + graph->edges[e->m_prev].m_next = e->m_next; + if (e->m_next != unused) + graph->edges[e->m_next].m_prev = e->m_prev; + + if (e->r_prev == unused) + vr->r_edge = e->r_next; + else + graph->edges[e->r_prev].r_next = e->r_next; + if (e->r_next != unused) + graph->edges[e->r_next].r_prev = e->r_prev; +} + +int +graph3_output_order(struct graph3 *graph) +{ + struct edge3 *e; + size_t i; + + graph->output_index = graph->e; + + for (i = 0; i < graph->v; ++i) + graph3_remove_vertex(graph, &graph->verts[i]); + + for (i = graph->e; i > 0 && i > graph->output_index;) { + --i; + e = &graph->edges[graph->output_order[i]]; + + graph3_remove_vertex(graph, &graph->verts[e->left]); + graph3_remove_vertex(graph, &graph->verts[e->middle]); + graph3_remove_vertex(graph, &graph->verts[e->right]); + } + + if (graph->output_index != 0) + return -1; + + return 0; +} Index: src/usr.bin/nbperf/graph3.h diff -u /dev/null src/usr.bin/nbperf/graph3.h:1.1 --- /dev/null Sat Aug 15 16:21:05 2009 +++ src/usr.bin/nbperf/graph3.h Sat Aug 15 16:21:05 2009 @@ -0,0 +1,62 @@ +/* $NetBSD: graph3.h,v 1.1 2009/08/15 16:21:05 joerg Exp $ */ +/*- + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Joerg Sonnenberger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Implementation of common 3-graph routines: + * - build a 3-graph with hash-triple as edges + * - check a 3-graph for acyclicness and compute an output order + */ + +struct vertex3 { + uint32_t l_edge, m_edge, r_edge; +}; + +struct edge3 { + uint32_t left, middle, right; + uint32_t l_prev, m_prev, l_next; + uint32_t r_prev, m_next, r_next; +}; + +struct graph3 { + struct vertex3 *verts; + struct edge3 *edges; + uint32_t output_index; + uint32_t *output_order; + uint32_t e, v; +}; + +void graph3_setup(struct graph3 *, uint32_t, uint32_t); +void graph3_free(struct graph3 *); + +int graph3_hash(struct nbperf *, struct graph3 *); +int graph3_output_order(struct graph3 *); Index: src/usr.bin/nbperf/nbperf-bdz.c diff -u /dev/null src/usr.bin/nbperf/nbperf-bdz.c:1.1 --- /dev/null Sat Aug 15 16:21:05 2009 +++ src/usr.bin/nbperf/nbperf-bdz.c Sat Aug 15 16:21:05 2009 @@ -0,0 +1,370 @@ +/* $NetBSD: nbperf-bdz.c,v 1.1 2009/08/15 16:21:05 joerg Exp $ */ +/*- + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Joerg Sonnenberger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__RCSID("$NetBSD: nbperf-bdz.c,v 1.1 2009/08/15 16:21:05 joerg Exp $"); + +#include <err.h> +#include <inttypes.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "nbperf.h" + +/* + * A full description of the algorithm can be found in: + * "Simple and Space-Efficient Minimal Perfect Hash Functions" + * by Botelho, Pagh and Ziviani, proceeedings of WADS 2007. + */ + +/* + * The algorithm is based on random, acyclic 3-graphs. + * + * Each edge in the represents a key. The vertices are the reminder of + * the hash function mod n. n = cm with c > 1.23. This ensures that + * can be found with a very high probality. + * + * An acyclic graph has an edge order, where at least one vertex of + * each edge hasn't been seen before. It is declares the first unvisited + * vertex as authoritive for the edge and assigns a 2bit value to unvisited + * vertices, so that the sum of all vertices of the edge modulo 4 is + * the index of the authoritive vertex. + */ + +#include "graph3.h" + +struct state { + struct graph3 graph; + uint32_t *visited; + uint32_t *holes64k; + uint16_t *holes256; + uint8_t *holes256_64; + uint8_t *holes256_128; + uint8_t *holes256_192; + uint8_t *g; + uint32_t *result_map; +}; + +static void +assign_nodes(struct state *state) +{ + struct edge3 *e; + size_t i, j; + uint32_t t, r, holes; + + for (i = 0; i < state->graph.v; ++i) + state->g[i] = 3; + + for (i = 0; i < state->graph.e; ++i) { + j = state->graph.output_order[i]; + e = &state->graph.edges[j]; + if (!state->visited[e->left]) { + r = 0; + t = e->left; + } else if (!state->visited[e->middle]) { + r = 1; + t = e->middle; + } else { + if (state->visited[e->right]) + abort(); + r = 2; + t = e->right; + } + + state->visited[t] = 2 + j; + if (state->visited[e->left] == 0) + state->visited[e->left] = 1; + if (state->visited[e->middle] == 0) + state->visited[e->middle] = 1; + if (state->visited[e->right] == 0) + state->visited[e->right] = 1; + + state->g[t] = (9 + r - state->g[e->left] - state->g[e->middle] + - state->g[e->right]) % 3; + } + + holes = 0; + for (i = 0; i < state->graph.v; ++i) { + if (i % 65536 == 0) + state->holes64k[i >> 16] = holes; + + if (i % 256 == 0) + state->holes256[i >> 8] = holes - state->holes64k[i >> 16]; + + if (i % 256 == 64) + state->holes256_64[i >> 8] = holes - state->holes256[i >> 8] - state->holes64k[i >> 16]; + + if (i % 256 == 128) + state->holes256_128[i >> 8] = holes - state->holes256[i >> 8] - state->holes64k[i >> 16]; + + if (i % 256 == 192) + state->holes256_192[i >> 8] = holes - state->holes256[i >> 8] - state->holes64k[i >> 16]; + + if (state->visited[i] > 1) { + j = state->visited[i] - 2; + state->result_map[j] = i - holes; + } + + if (state->g[i] == 3) + ++holes; + } + + if (i % 65536 != 0) + state->holes64k[(i >> 16) + 1] = holes; + + if (i % 256 != 0) + state->holes256[(i >> 8) + 1] = holes - state->holes64k[((i >> 8) + 1) >> 8]; + + if (i % 256 != 64) + state->holes256_64[(i >> 8) + 1] = holes - state->holes256[(i >> 8) + 1] - state->holes64k[((i >> 8) + 1) >> 8]; + + if (i % 256 != 128) + state->holes256_128[(i >> 8) + 1] = holes - state->holes256[(i >> 8) + 1] - state->holes64k[((i >> 8) + 1) >> 8]; + + if (i % 256 != 192) + state->holes256_192[(i >> 8) + 1] = holes - state->holes256[(i >> 8) + 1] - state->holes64k[((i >> 8) + 1) >> 8]; +} + +static void +print_hash(struct nbperf *nbperf, struct state *state) +{ + size_t i, j; + uint32_t sum; + + fprintf(nbperf->output, "#include <stdlib.h>\n"); + fprintf(nbperf->output, "#include <strings.h>\n\n"); + + fprintf(nbperf->output, "%suint32_t\n", + nbperf->static_hash ? "static " : ""); + fprintf(nbperf->output, + "%s(const void * __restrict key, size_t keylen)\n", + nbperf->hash_name); + fprintf(nbperf->output, "{\n"); + fprintf(nbperf->output, + "\tstatic const uint32_t g[%" PRId32 "] = {\n", + (state->graph.v + 15) / 16); + for (i = 0; i < state->graph.v; i += 16) { + for (j = 0, sum = 0; j < 16; ++j) + sum |= (uint32_t)state->g[i + j] << (2 * j); + + fprintf(nbperf->output, "%s0x%08" PRIx32 "ULL,%s", + (i / 16 % 4 == 0 ? "\t " : " "), + sum, + (i / 16 % 4 == 3 ? "\n" : "")); + } + fprintf(nbperf->output, "%s\t};\n", (i / 16 % 4 ? "\n" : "")); + + fprintf(nbperf->output, + "\tstatic const uint32_t holes64k[%" PRId32 "] = {\n", + (state->graph.v + 65535) / 65536); + for (i = 0; i < state->graph.v; i += 65536) + fprintf(nbperf->output, "%s0x%08" PRIx32 ",%s", + (i / 65536 % 4 == 0 ? "\t " : " "), + state->holes64k[i >> 16], + (i / 65536 % 4 == 3 ? "\n" : "")); + fprintf(nbperf->output, "%s\t};\n", (i / 65536 % 4 ? "\n" : "")); + + fprintf(nbperf->output, + "\tstatic const uint16_t holes256[%" PRId32 "] = {\n", + (state->graph.v + 255) / 256); + for (i = 0; i < state->graph.v; i += 256) + fprintf(nbperf->output, "%s0x%04" PRIx32 ",%s", + (i / 256 % 4 == 0 ? "\t " : " "), + state->holes256[i >> 8], + (i / 256 % 4 == 3 ? "\n" : "")); + fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : "")); + + fprintf(nbperf->output, + "\tstatic const uint8_t holes256_64[%" PRId32 "] = {\n", + (state->graph.v + 255) / 256); + for (i = 64; i < state->graph.v; i += 256) + fprintf(nbperf->output, "%s0x%02" PRIx32 ",%s", + (i / 256 % 4 == 0 ? "\t " : " "), + state->holes256_64[i >> 8], + (i / 256 % 4 == 3 ? "\n" : "")); + fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : "")); + + fprintf(nbperf->output, + "\tstatic const uint8_t holes256_128[%" PRId32 "] = {\n", + (state->graph.v + 255) / 256); + for (i = 128; i < state->graph.v; i += 256) + fprintf(nbperf->output, "%s0x%02" PRIx32 ",%s", + (i / 256 % 4 == 0 ? "\t " : " "), + state->holes256_128[i >> 8], + (i / 256 % 4 == 3 ? "\n" : "")); + fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : "")); + + fprintf(nbperf->output, + "\tstatic const uint8_t holes256_192[%" PRId32 "] = {\n", + (state->graph.v + 255) / 256); + for (i = 192; i < state->graph.v; i += 256) + fprintf(nbperf->output, "%s0x%02" PRIx32 ",%s", + (i / 256 % 4 == 0 ? "\t " : " "), + state->holes256_192[i >> 8], + (i / 256 % 4 == 3 ? "\n" : "")); + fprintf(nbperf->output, "%s\t};\n", (i / 256 % 4 ? "\n" : "")); + + fprintf(nbperf->output, "\tuint32_t h[%zu];\n\n", nbperf->hash_size); + fprintf(nbperf->output, "\tuint32_t m;\n"); + fprintf(nbperf->output, "\tuint32_t a1, a2, b1, b2, c1, c2, idx, idx2;\n\n"); + + (*nbperf->print_hash)(nbperf, "\t", "key", "keylen", "h"); + + fprintf(nbperf->output, "\n\th[0] = h[0] %% %" PRIu32 ";\n", state->graph.v); + fprintf(nbperf->output, "\th[1] = h[1] %% %" PRIu32 ";\n", state->graph.v); + fprintf(nbperf->output, "\th[2] = h[2] %% %" PRIu32 ";\n", state->graph.v); + + fprintf(nbperf->output, "\n\ta1 = h[0] >> 4;\n"); + fprintf(nbperf->output, "\ta2 = 2 * (h[0] & 15);\n"); + fprintf(nbperf->output, "\tb1 = h[1] >> 4;\n"); + fprintf(nbperf->output, "\tb2 = 2 * (h[1] & 15);\n"); + fprintf(nbperf->output, "\tc1 = h[2] >> 4;\n"); + fprintf(nbperf->output, "\tc2 = 2 * (h[2] & 15);\n"); + + fprintf(nbperf->output, + "\tidx = h[(((g[a1] >> a2) & 3) + ((g[b1] >> b2) & 3) +\n" + "\t ((g[c1] >> c2) & 3)) %% 3];\n\n"); + + fprintf(nbperf->output, + "\tswitch ((idx >> 5) & 7) {\n" + "\tcase 0:\n" + "\t idx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8];\n" + "\t break;\n" + "\tcase 1: case 2:\n" + "\t idx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8]\n" + "\t - holes256_64[idx >> 8];\n" + "\t break;\n" + "\tcase 3: case 4:\n" + "\t idx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8]\n" + "\t - holes256_128[idx >> 8];\n" + "\t break;\n" + "\tcase 5: case 6:\n" + "\t idx2 = idx - holes64k[idx >> 16] - holes256[idx >> 8]\n" + "\t - holes256_192[idx >> 8];\n" + "\t break;\n" + "\tcase 7:\n" + "\t idx2 = idx - holes64k[(idx + 32) >> 16] -\n" + "\t holes256[(idx + 32) >> 8];\n" + "\t break;\n" + "\t}\n" + "\tswitch ((idx >> 4) & 3) {\n" + "\tcase 1:\n" + "\t m = (g[(idx >> 4) - 1] & (g[(idx >> 4) - 1] >> 1) & 0x55555555U);\n" + "\t idx2 -= popcount32(m);\n" + "\tcase 0:\n" + "\t m = (g[idx >> 4] & (g[idx >> 4] >> 1) & 0x55555555U);\n" + "\t m &= ((2U << (2 * (idx & 15))) - 1);\n" + "\t idx2 -= popcount32(m);\n" + "\t break;\n" + "\tcase 2:\n" + "\t m = (g[(idx >> 4) + 1] & (g[(idx >> 4) + 1] >> 1) & 0x55555555U);\n" + "\t idx2 += popcount32(m);\n" + "\tcase 3:\n" + "\t m = (g[idx >> 4] & (g[idx >> 4] >> 1) & 0x55555555U);\n" + "\t m &= ~((2U << (2 * (idx & 15))) - 1);\n" + "\t idx2 += popcount32(m);\n" + "\t break;\n" + "\t}\n\n"); + + fprintf(nbperf->output, + "\treturn idx2;\n"); + fprintf(nbperf->output, "}\n"); + + if (nbperf->map_output != NULL) { + for (i = 0; i < state->graph.e; ++i) + fprintf(nbperf->map_output, "%" PRIu32 "\n", + state->result_map[i]); + } +} + +int +bdz_compute(struct nbperf *nbperf) +{ + struct state state; + int retval = -1; + uint32_t v, e; + + if (nbperf->c == 0) + nbperf->c = 1.24; + if (nbperf->c < 1.24) + errx(1, "The argument for option -c must be at least 1.24"); + if (nbperf->hash_size < 3) + errx(1, "The hash function must generate at least 3 values"); + + (*nbperf->seed_hash)(nbperf); + e = nbperf->n; + v = nbperf->c * nbperf->n; + if (1.24 * nbperf->n > v) + ++v; + if (v < 10) + v = 10; + + graph3_setup(&state.graph, v, e); + + state.holes64k = calloc(sizeof(uint32_t), (v + 65535) / 65536 + 1); + state.holes256 = calloc(sizeof(uint16_t), (v + 255) / 256 + 1); + state.holes256_64 = calloc(sizeof(uint8_t), (v + 255) / 256 + 1); + state.holes256_128 = calloc(sizeof(uint8_t), (v + 255) / 256 + 1); + state.holes256_192 = calloc(sizeof(uint8_t), (v + 255) / 256 + 1); + state.g = calloc(sizeof(uint32_t), v); + state.visited = calloc(sizeof(uint32_t), v); + state.result_map = calloc(sizeof(uint32_t), e); + + if (state.holes64k == NULL || state.holes256 == NULL || + state.holes256_64 == NULL || state.holes256_128 == NULL || + state.holes256_192 == NULL || state.g == NULL || + state.visited == NULL || state.result_map == NULL) + err(1, "malloc failed"); + + if (graph3_hash(nbperf, &state.graph)) + goto failed; + if (graph3_output_order(&state.graph)) + goto failed; + assign_nodes(&state); + print_hash(nbperf, &state); + + retval = 0; + +failed: + graph3_free(&state.graph); + free(state.visited); + free(state.g); + free(state.holes64k); + free(state.holes256); + free(state.holes256_64); + free(state.holes256_128); + free(state.holes256_192); + free(state.result_map); + return retval; +} Index: src/usr.bin/nbperf/nbperf-chm.c diff -u /dev/null src/usr.bin/nbperf/nbperf-chm.c:1.1 --- /dev/null Sat Aug 15 16:21:05 2009 +++ src/usr.bin/nbperf/nbperf-chm.c Sat Aug 15 16:21:05 2009 @@ -0,0 +1,261 @@ +/* $NetBSD: nbperf-chm.c,v 1.1 2009/08/15 16:21:05 joerg Exp $ */ +/*- + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Joerg Sonnenberger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__RCSID("$NetBSD: nbperf-chm.c,v 1.1 2009/08/15 16:21:05 joerg Exp $"); + +#include <err.h> +#include <inttypes.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "nbperf.h" + +#ifdef BUILD_CHM3 +#include "graph3.h" +#else +#include "graph2.h" +#endif + +/* + * A full description of the algorithm can be found in: + * "An optimal algorithm for generating minimal perfect hash functions" + * by Czech, Havas and Majewski in Information Processing Letters, + * 43(5):256-264, October 1992. + */ + +/* + * The algorithm is based on random, acyclic graphs. + * + * Each edge in the represents a key. The vertices are the reminder of + * the hash function mod n. n = cm with c > 2, otherwise the propability + * of finding an acyclic graph is very low (for 2-graphs). The constant + * for 3-graphs is 1.24. + * + * After the hashing phase, the graph is checked for cycles. + * A cycle-free graph is either empty or has a vertex of degree 1. + * Removing the edge for this vertex doesn't change this property, + * so applying this recursively reduces the size of the graph. + * If the graph is empty at the end of the process, it was acyclic. + * + * The assignment step now sets g[i] := 0 and processes the edges + * in reverse order of removal. That ensures that at least one vertex + * is always unvisited and can be assigned. + */ + +struct state { +#ifdef BUILD_CHM3 + struct graph3 graph; +#else + struct graph2 graph; +#endif + uint32_t *g; + uint8_t *visited; +}; + +static void +assign_nodes(struct state *state) +{ +#ifdef BUILD_CHM3 + struct edge3 *e; +#else + struct edge2 *e; +#endif + size_t i; + uint32_t e_idx; + + for (i = 0; i < state->graph.e; ++i) { + e_idx = state->graph.output_order[i]; + e = &state->graph.edges[e_idx]; + +#ifdef BUILD_CHM3 + if (!state->visited[e->left]) { + state->g[e->left] = (2 * state->graph.e + e_idx + - state->g[e->middle] - state->g[e->right]) + % state->graph.e; + } else if (!state->visited[e->middle]) { + state->g[e->middle] = (2 * state->graph.e + e_idx + - state->g[e->left] - state->g[e->right]) + % state->graph.e; + } else { + state->g[e->right] = (2 * state->graph.e + e_idx + - state->g[e->left] - state->g[e->middle]) + % state->graph.e; + } + state->visited[e->left] = 1; + state->visited[e->middle] = 1; + state->visited[e->right] = 1; +#else + if (!state->visited[e->left]) { + state->g[e->left] = (state->graph.e + e_idx + - state->g[e->right]) % state->graph.e; + } else { + state->g[e->right] = (state->graph.e + e_idx + - state->g[e->left]) % state->graph.e; + } + state->visited[e->left] = 1; + state->visited[e->right] = 1; +#endif + } +} + +static void +print_hash(struct nbperf *nbperf, struct state *state) +{ + uint32_t i; + const char *g_type; + + fprintf(nbperf->output, "#include <stdlib.h>\n\n"); + + fprintf(nbperf->output, "%suint32_t\n", + nbperf->static_hash ? "static " : ""); + fprintf(nbperf->output, + "%s(const void * __restrict key, size_t keylen)\n", + nbperf->hash_name); + fprintf(nbperf->output, "{\n"); + if (state->graph.v >= 65536) + g_type = "uint32_t"; + else if (state->graph.v >= 256) + g_type = "uint16_t"; + else + g_type = "uint8_t"; + fprintf(nbperf->output, "\tstatic const %s g[%" PRId32 "] = {\n", + g_type, state->graph.v); + for (i = 0; i < state->graph.v; ++i) { + fprintf(nbperf->output, "%s0x%08" PRIx32 ",%s", + (i % 4 == 0 ? "\t " : " "), + state->g[i], + (i % 4 == 3 ? "\n" : "")); + } + if (i / 16 % 4 == 3) + fprintf(nbperf->output, "\n\t};\n"); + else + fprintf(nbperf->output, "\t};\n"); + fprintf(nbperf->output, "\tuint32_t h[%zu];\n\n", nbperf->hash_size); + (*nbperf->print_hash)(nbperf, "\t", "key", "keylen", "h"); +#ifdef BUILD_CHM3 + fprintf(nbperf->output, "\treturn (g[h[0] %% %" PRIu32 "] + " + "g[h[1] %% %" PRIu32 "] + " + "g[h[2] %% %" PRIu32"]) %% %" PRIu32 ";\n", + state->graph.v, state->graph.v, state->graph.v, state->graph.e); +#else + fprintf(nbperf->output, "\treturn (g[h[0] %% %" PRIu32 "] + " + "g[h[1] %% %" PRIu32"]) %% %" PRIu32 ";\n", + state->graph.v, state->graph.v, state->graph.e); +#endif + fprintf(nbperf->output, "}\n"); + + if (nbperf->map_output != NULL) { + for (i = 0; i < state->graph.e; ++i) + fprintf(nbperf->map_output, "%" PRIu32 "\n", i); + } +} + +int +#ifdef BUILD_CHM3 +chm3_compute(struct nbperf *nbperf) +#else +chm_compute(struct nbperf *nbperf) +#endif +{ + struct state state; + int retval = -1; + uint32_t v, e; + +#ifdef BUILD_CHM3 + if (nbperf->c == 0) + nbperf-> c = 1.24; + + if (nbperf->c < 1.24) + errx(1, "The argument for option -c must be at least 1.24"); + + if (nbperf->hash_size < 3) + errx(1, "The hash function must generate at least 3 values"); +#else + if (nbperf->c == 0) + nbperf-> c = 2; + + if (nbperf->c < 2) + errx(1, "The argument for option -c must be at least 2"); + + if (nbperf->hash_size < 2) + errx(1, "The hash function must generate at least 2 values"); +#endif + + (*nbperf->seed_hash)(nbperf); + e = nbperf->n; + v = nbperf->c * nbperf->n; +#ifdef BUILD_CHM3 + if (v == 1.24 * nbperf->n) + ++v; + if (v < 10) + v = 10; +#else + if (v == 2 * nbperf->n) + ++v; +#endif + + state.g = calloc(sizeof(uint32_t), v); + state.visited = calloc(sizeof(uint8_t), v); + if (state.g == NULL || state.visited == NULL) + err(1, "malloc failed"); + +#ifdef BUILD_CHM3 + graph3_setup(&state.graph, v, e); + if (graph3_hash(nbperf, &state.graph)) + goto failed; + if (graph3_output_order(&state.graph)) + goto failed; +#else + graph2_setup(&state.graph, v, e); + if (graph2_hash(nbperf, &state.graph)) + goto failed; + if (graph2_output_order(&state.graph)) + goto failed; +#endif + assign_nodes(&state); + print_hash(nbperf, &state); + + retval = 0; + +failed: +#ifdef BUILD_CHM3 + graph3_free(&state.graph); +#else + graph2_free(&state.graph); +#endif + free(state.g); + free(state.visited); + return retval; +} Index: src/usr.bin/nbperf/nbperf-chm3.c diff -u /dev/null src/usr.bin/nbperf/nbperf-chm3.c:1.1 --- /dev/null Sat Aug 15 16:21:05 2009 +++ src/usr.bin/nbperf/nbperf-chm3.c Sat Aug 15 16:21:05 2009 @@ -0,0 +1,4 @@ +/* $NetBSD: nbperf-chm3.c,v 1.1 2009/08/15 16:21:05 joerg Exp $ */ + +#define BUILD_CHM3 +#include "nbperf-chm.c" Index: src/usr.bin/nbperf/nbperf.1 diff -u /dev/null src/usr.bin/nbperf/nbperf.1:1.1 --- /dev/null Sat Aug 15 16:21:05 2009 +++ src/usr.bin/nbperf/nbperf.1 Sat Aug 15 16:21:05 2009 @@ -0,0 +1,130 @@ +.\" $NetBSD: nbperf.1,v 1.1 2009/08/15 16:21:05 joerg Exp $ +.\" +.\" Copyright (c) 2009 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" This code is derived from software contributed to The NetBSD Foundation +.\" by Joerg Sonnenberger. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd July 13, 2009 +.Dt NBPERF 1 +.Os +.Sh NAME +.Nm nbperf +.Nd compute a perfect hash function +.Sh SYNOPSIS +.Nm +.Op Fl s +.Op Fl a Ar algorithm +.Op Fl c Ar utilisation +.Op Fl h Ar hash +.Op Fl i Ar iterations +.Op Fl m Ar map-file +.Op Fl n Ar name +.Op Fl o Ar output +.Op Ar input +.Sh DESCRIPTION +.Nm +reads a number of keys one per line from standard input or +.Ar input . +It computes an order-preserving minimal perfect hash function and writes +it to stdout or +.Ar output . +.Pp +The +.Fl m +argument instructs +.Nm +to write the resulting key mapping to +.Ar map-file . +Each line gives the result of the hash function for the corresponding input +key. +.Pp +The parameter +.Ar utilisation +determines the space efficiency. +.Pp +Supported arguments for +.Fl a : +.Bl -tag -width "chm" +.It Sy chm +This results in an order preserving minimal perfect hash function. +The +.Ar utilisation +must be at least 2, the default. +The number of iterations needed grows if the utilisation is very near to 2. +.It Sy chm3 +Similar to +.Ar chm . +The resulting hash function needs three instead of two table lookups when +compared to +.Ar chm . +The +.Ar utilisation +must be at least 1.24, the default. +This makes the output for +.Ar chm3 +noticable smaller than the output for +.Ar chm . +.It Sy bdz +This results in a non-order preserving minimal perfect hash function. +Output size is approximately 2.85 bit per key for the default value of +.Ar utilisation , +1.24. +This is also the smallest supported value. +.El +.Pp +Supported arguments for +.Fl h : +.Bl -tag -width "mi_vector_hash" +.It Sy mi_vector_hash +Platform-independent version of Jenkins parallel hash. +See +.Xr mi_vector_hash 3 . +.El +.Pp +The number of iterations can be limited with +.Fl i . +.Nm +outputs a function matching +.Ft uint32_t +.Fn hash "const void * restrict" "size_t" +to stdout. +The function expects the key length as second argument, for strings not +including the terminating NUL. +It is the responsibility of the caller to pass in only valid keys or compare +the resulting index to the key. +The function name can be changed using +.Fl n Ar name . +If the +.Fl s +flag is specified, it will be static. +.Pp +After each failing iteration, a dot is written to stderr. +.Sh EXIT STATUS +.Ex -std +.Sh SEE ALSO +.Xr mi_vector_hash 3 +.Sh AUTHORS +.An J\(:org Sonnenberger Index: src/usr.bin/nbperf/nbperf.c diff -u /dev/null src/usr.bin/nbperf/nbperf.c:1.1 --- /dev/null Sat Aug 15 16:21:05 2009 +++ src/usr.bin/nbperf/nbperf.c Sat Aug 15 16:21:05 2009 @@ -0,0 +1,231 @@ +/* $NetBSD: nbperf.c,v 1.1 2009/08/15 16:21:05 joerg Exp $ */ +/*- + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Joerg Sonnenberger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__RCSID("$NetBSD: nbperf.c,v 1.1 2009/08/15 16:21:05 joerg Exp $"); + +#include <sys/endian.h> +#include <err.h> +#include <errno.h> +#include <inttypes.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include "nbperf.h" + +static __dead +void usage(void) +{ + fprintf(stderr, + "%s [-s] [-c utilisation] [-i iterations] [-n name] " + "[-o output] input\n", + getprogname()); + exit(1); +} + +static void +mi_vector_hash_seed_hash(struct nbperf *nbperf) +{ + nbperf->seed[0] = arc4random(); +} + +static void +mi_vector_hash_compute(struct nbperf *nbperf, const void *key, size_t keylen, + uint32_t *hashes) +{ + mi_vector_hash(key, keylen, nbperf->seed[0], hashes); +} + +static void +mi_vector_hash_print_hash(struct nbperf *nbperf, const char *indent, + const char *key, const char *keylen, const char *hash) +{ + fprintf(nbperf->output, + "%smi_vector_hash(%s, %s, 0x%08" PRIx32 "U, %s);\n", + indent, key, keylen, nbperf->seed[0], hash); +} + +static void +set_hash(struct nbperf *nbperf, const char *arg) +{ + if (strcmp(arg, "mi_vector_hash") == 0) { + nbperf->hash_size = 3; + nbperf->seed_hash = mi_vector_hash_seed_hash; + nbperf->compute_hash = mi_vector_hash_compute; + nbperf->print_hash = mi_vector_hash_print_hash; + return; + } + errx(1, "Unknown hash function: %s", arg); +} + +int +main(int argc, char **argv) +{ + struct nbperf nbperf = { + .c = 0, + .hash_name = "hash", + .map_output = NULL, + .output = NULL, + .static_hash = 0, + }; + FILE *input; + size_t curlen = 0, curalloc = 0; + char *line, *eos; + size_t line_len; + const void **keys = NULL; + size_t *keylens = NULL; + uint32_t max_iterations = 0xffffffU; + long long tmp; + int looped, ch; + int (*build_hash)(struct nbperf *) = chm_compute; + + set_hash(&nbperf, "mi_vector_hash"); + + while ((ch = getopt(argc, argv, "a:c:h:i:m:n:o:s")) != -1) { + switch (ch) { + case 'a': + if (strcmp(optarg, "chm") == 0) + build_hash = chm_compute; + else if (strcmp(optarg, "chm3") == 0) + build_hash = chm3_compute; + else if (strcmp(optarg, "bdz") == 0) + build_hash = bdz_compute; + else + errx(1, "Unsupport algorithm: %s", optarg); + break; + case 'c': + errno = 0; + nbperf.c = strtod(optarg, &eos); + if (errno || eos[0] || !nbperf.c) + errx(2, "Invalid argument for -c"); + break; + case 'h': + set_hash(&nbperf, optarg); + break; + case 'i': + errno = 0; + tmp = strtoll(optarg, &eos, 0); + if (errno || eos == optarg || eos[0] || + tmp < 0 || tmp > 0xffffffffU) + errx(2, "Iteration count must be " + "a 32bit integer"); + max_iterations = (uint32_t)tmp; + break; + case 'm': + if (nbperf.map_output) + fclose(nbperf.map_output); + nbperf.map_output = fopen(optarg, "w"); + if (nbperf.map_output == NULL) + err(2, "cannot open map file"); + break; + case 'n': + nbperf.hash_name = optarg; + break; + case 'o': + if (nbperf.output) + fclose(nbperf.output); + nbperf.output = fopen(optarg, "w"); + if (nbperf.output == NULL) + err(2, "cannot open output file"); + break; + case 's': + nbperf.static_hash = 1; + break; + default: + usage(); + } + } + + argc -= optind; + argv += optind; + + if (argc > 1) + usage(); + + if (argc == 1) { + input = fopen(argv[0], "r"); + if (input == NULL) + err(1, "can't open input file"); + } else + input = stdin; + + if (nbperf.output == NULL) + nbperf.output = stdout; + + while ((line = fgetln(input, &line_len)) != NULL) { + if (line_len && line[line_len - 1] == '\n') + --line_len; + if (curlen == curalloc) { + if (curalloc < 256) + curalloc = 256; + else + curalloc += curalloc; + keys = realloc(keys, curalloc * sizeof(*keys)); + if (keys == NULL) + err(1, "realloc failed"); + keylens = realloc(keylens, + curalloc * sizeof(*keylens)); + if (keylens == NULL) + err(1, "realloc failed"); + } + if ((keys[curlen] = strndup(line, line_len)) == NULL) + err(1, "malloc failed"); + keylens[curlen] = line_len; + ++curlen; + } + + if (input != stdin) + fclose(input); + + nbperf.n = curlen; + nbperf.keys = keys; + nbperf.keylens = keylens; + + looped = 0; + while ((*build_hash)(&nbperf)) { + fputc('.', stderr); + looped = 1; + if (max_iterations == 0xffffffffU) + continue; + if (--max_iterations == 0) { + fputc('\n', stderr); + errx(1, "Iteration count reached"); + } + } + if (looped) + fputc('\n', stderr); + + return 0; +} Index: src/usr.bin/nbperf/nbperf.h diff -u /dev/null src/usr.bin/nbperf/nbperf.h:1.1 --- /dev/null Sat Aug 15 16:21:05 2009 +++ src/usr.bin/nbperf/nbperf.h Sat Aug 15 16:21:05 2009 @@ -0,0 +1,56 @@ +/* $NetBSD: nbperf.h,v 1.1 2009/08/15 16:21:05 joerg Exp $ */ +/*- + * Copyright (c) 2009 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Joerg Sonnenberger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +struct nbperf { + FILE *output; + FILE *map_output; + const char *hash_name; + int static_hash; + size_t n; + const void * __restrict * keys; + const size_t *keylens; + + double c; + + size_t hash_size; + void (*seed_hash)(struct nbperf *); + void (*print_hash)(struct nbperf *, const char *, const char *, const char *, + const char *); + void (*compute_hash)(struct nbperf *, const void *, size_t, + uint32_t *); + uint32_t seed[1]; +}; + +int chm_compute(struct nbperf *); +int chm3_compute(struct nbperf *); +int bdz_compute(struct nbperf *);