sorry to reply this late. 
Ru Jia recently has been very busy and I happened to miss this email and still 
am wondering 
why nobody review this. 

Now, we submit the v4. 


> 在 2016年6月3日,上午2:30,Maxim Uvarov <[email protected]> 写道:
> 
> Just status to be clear here. Bill wanted to review/test this patch but fail 
> to apply it.
> Please update to current master and send v4.
> 
> Maxim.
> 
> On 06/02/16 06:25, HePeng wrote:
>> Ping.
>> 
>>> 在 2016年5月23日,下午12:26,Ru Jia <[email protected]> 写道:
>>> 
>>> This is an implementation of the 16,8,8 ip lookup
>>> (longest prefix matching) algorithm. The key of the
>>> table is 32-bit IPv4 address.
>>> 
>>> Signed-off-by: Ru Jia <[email protected]>
>>> ---
>>> helper/Makefile.am          |   4 +-
>>> helper/iplookuptable.c      | 974 
>>> ++++++++++++++++++++++++++++++++++++++++++++
>>> helper/odph_iplookuptable.h |  58 +++
>>> 3 files changed, 1035 insertions(+), 1 deletion(-)
>>> create mode 100644 helper/iplookuptable.c
>>> create mode 100644 helper/odph_iplookuptable.h
>>> 
>>> diff --git a/helper/Makefile.am b/helper/Makefile.am
>>> index aa58e8c..2e1d519 100644
>>> --- a/helper/Makefile.am
>>> +++ b/helper/Makefile.am
>>> @@ -27,6 +27,7 @@ noinst_HEADERS = \
>>>              $(srcdir)/odph_debug.h \
>>>              $(srcdir)/odph_hashtable.h \
>>>              $(srcdir)/odph_lineartable.h \
>>> +            $(srcdir)/odph_iplookuptable.h \
>>>              $(srcdir)/odph_list_internal.h
>>> 
>>> __LIB__libodphelper_linux_la_SOURCES = \
>>> @@ -35,6 +36,7 @@ __LIB__libodphelper_linux_la_SOURCES = \
>>>                                     chksum.c \
>>>                                     linux.c \
>>>                                     hashtable.c \
>>> -                                   lineartable.c
>>> +                                   lineartable.c \
>>> +                                   iplookuptable.c
>>> 
>>> lib_LTLIBRARIES = $(LIB)/libodphelper-linux.la
>>> diff --git a/helper/iplookuptable.c b/helper/iplookuptable.c
>>> new file mode 100644
>>> index 0000000..a0b7e6b
>>> --- /dev/null
>>> +++ b/helper/iplookuptable.c
>>> @@ -0,0 +1,974 @@
>>> +/* Copyright (c) 2015, Linaro Limited
>>> + * All rights reserved.
>>> + *
>>> + * SPDX-License-Identifier:     BSD-3-Clause
>>> + */
>>> +
>>> +#include <string.h>
>>> +#include <stdint.h>
>>> +#include <errno.h>
>>> +#include <stdio.h>
>>> +
>>> +#include "odph_iplookuptable.h"
>>> +#include "odph_list_internal.h"
>>> +#include "odph_debug.h"
>>> +#include <odp_api.h>
>>> +
>>> +/** @magic word, write to the first byte of the memory block
>>> + *   to indicate this block is used by a ip lookup table
>>> + */
>>> +#define ODPH_IP_LOOKUP_TABLE_MAGIC_WORD 0xCFCFFCFC
>>> +
>>> +/* The length(bit) of the IPv4 address */
>>> +#define IP_LENGTH 32
>>> +
>>> +/* The number of L1 entries */
>>> +#define ENTRY_NUM_L1               (1 << 16)
>>> +/* The size of one L2\L3 subtree */
>>> +#define ENTRY_NUM_SUBTREE  (1 << 8)
>>> +
>>> +#define WHICH_CHILD(ip, cidr) ((ip >> (IP_LENGTH - cidr)) & 0x00000001)
>>> +
>>> +/** @internal entry struct
>>> + *   Structure store an entry of the ip prefix table.
>>> + *   Because of the leaf pushing, each entry of the table must have
>>> + *   either a child entry, or a nexthop info.
>>> + *   If child == 0 and index != ODP_BUFFER_INVALID, this entry has
>>> + *         a nexthop info, index indicates the buffer that stores the
>>> + *         nexthop value, and ptr points to the address of the buffer.
>>> + *   If child == 1, this entry has a subtree, index indicates
>>> + *         the buffer that stores the subtree, and ptr points to the
>>> + *         address of the buffer.
>>> + */
>>> +typedef struct {
>>> +   union {
>>> +           uint8_t u8;
>>> +           struct {
>>> +#if ODP_BYTE_ORDER == ODP_BIG_ENDIAN
>>> +                   uint8_t child : 1;
>>> +                   uint8_t cidr  : 7;
>>> +#else
>>> +                   uint8_t cidr  : 7;
>>> +                   uint8_t child : 1;
>>> +#endif
>>> +           };
>>> +   };
>>> +   odp_buffer_t index;
>>> +   void *ptr;
>>> +} prefix_entry_t;
>>> +
>>> +/** @internal trie node struct
>>> + *  In this IP lookup algorithm, we use a
>>> + *  binary tire to detect the overlap prefix.
>>> + */
>>> +typedef struct trie_node {
>>> +   /* tree structure */
>>> +   struct trie_node *parent;
>>> +   struct trie_node *left;
>>> +   struct trie_node *right;
>>> +   /* IP prefix length */
>>> +   uint8_t cidr;
>>> +   /* Nexthop buffer index */
>>> +   odp_buffer_t nexthop;
>>> +   /* Buffer that stores this node */
>>> +   odp_buffer_t buffer;
>>> +} trie_node_t;
>>> +
>>> +/** Number of L2\L3 entries(subtrees) per cache cube. */
>>> +#define CACHE_NUM_SUBTREE  (1 << 13)
>>> +/** Number of nexthop values per cache cube. */
>>> +#define CACHE_NUM_NEXTHOP  (1 << 20)
>>> +/** Number of trie nodes per cache cube. */
>>> +#define CACHE_NUM_TRIE             (1 << 20)
>>> +
>>> +/** @typedef cache_type_t
>>> + * Cache node type
>>> + */
>>> +typedef enum {
>>> +   CACHE_TYPE_SUBTREE = 0,
>>> +   CACHE_TYPE_TRIE,
>>> +   CACHE_TYPE_NEXTHOP
>>> +} cache_type_t;
>>> +
>>> +/** A IP lookup table structure. */
>>> +typedef struct {
>>> +   /**< for check */
>>> +   uint32_t magicword;
>>> +   /** Name of the hash. */
>>> +   char name[ODPH_TABLE_NAME_LEN];
>>> +   /** Total L1 entries. */
>>> +   prefix_entry_t *l1e;
>>> +   /** Root node of the binary trie */
>>> +   trie_node_t *trie;
>>> +   /** Length of value. */
>>> +   uint32_t nexthop_len;
>>> +   /** Queues of free slots (caches)
>>> +    *  There are three queues:
>>> +    *  - free_slots[CACHE_TYPE_SUBTREE] is used for L2 and
>>> +    *    L3 entries (subtrees). Each buffer stores an 8-bit
>>> +    *    subtree.
>>> +    *  - free_slots[CACHE_TYPE_TRIE] is used for the binary
>>> +    *    trie. Each buffer contains a trie node.
>>> +    *  - free_slots[CACHE_TYPE_NEXTHOP] is used for the
>>> +    *    nexthop information storage.
>>> +    */
>>> +   odp_queue_t free_slots[3];
>>> +   /** The number of pool used by each queue. */
>>> +   uint32_t cache_count[3];
>>> +} odph_iplookup_table_impl ODP_ALIGNED_CACHE;
>>> +
>>> +/***********************************************************
>>> + *****************   Cache management   ********************
>>> + ***********************************************************/
>>> +
>>> +/** Destroy all caches */
>>> +static void
>>> +cache_destroy(odph_iplookup_table_impl *impl)
>>> +{
>>> +   odp_queue_t queue;
>>> +   odp_event_t ev;
>>> +   uint32_t i = 0, count = 0;
>>> +   char pool_name[ODPH_TABLE_NAME_LEN + 8];
>>> +
>>> +   /* free all buffers in the queue */
>>> +   for (; i < 3; i++) {
>>> +           queue = impl->free_slots[i];
>>> +           if (queue == ODP_QUEUE_INVALID)
>>> +                   continue;
>>> +
>>> +           while ((ev = odp_queue_deq(queue))
>>> +                           != ODP_EVENT_INVALID) {
>>> +                   odp_buffer_free(odp_buffer_from_event(ev));
>>> +           }
>>> +           odp_queue_destroy(queue);
>>> +   }
>>> +
>>> +   /* destroy all cache pools */
>>> +   for (i = 0; i < 3; i++) {
>>> +           for (count = 0; count < impl->cache_count[i]; count++) {
>>> +                   sprintf(
>>> +                                   pool_name, "%s_%d_%d",
>>> +                                   impl->name, i, count);
>>> +                   odp_pool_destroy(odp_pool_lookup(pool_name));
>>> +           }
>>> +   }
>>> +}
>>> +
>>> +/** According to the type of cahce, set the value of
>>> + *  a buffer to the initial value.
>>> + */
>>> +static void
>>> +cache_init_buffer(odp_buffer_t buffer, cache_type_t type, uint32_t size) {
>>> +   int i = 0;
>>> +   void *addr = odp_buffer_addr(buffer);
>>> +
>>> +   memset(addr, 0, size);
>>> +   if (type == CACHE_TYPE_SUBTREE) {
>>> +           prefix_entry_t *entry = (prefix_entry_t *)addr;
>>> +
>>> +           for (i = 0; i < ENTRY_NUM_SUBTREE; i++, entry++) {
>>> +                   entry->cidr = 0;
>>> +                   entry->index = ODP_BUFFER_INVALID;
>>> +                   entry->ptr = NULL;
>>> +           }
>>> +   } else if (type == CACHE_TYPE_TRIE) {
>>> +           trie_node_t *node = (trie_node_t *)addr;
>>> +
>>> +           node->buffer = buffer;
>>> +           node->nexthop = ODP_BUFFER_INVALID;
>>> +   }
>>> +}
>>> +
>>> +/** Create a new buffer pool, and insert its buffer into the queue. */
>>> +static int
>>> +cache_alloc_new_pool(
>>> +           odph_iplookup_table_impl *tbl, cache_type_t type)
>>> +{
>>> +   odp_pool_t pool;
>>> +   odp_pool_param_t param;
>>> +   odp_queue_t queue = tbl->free_slots[type];
>>> +
>>> +   odp_buffer_t buffer;
>>> +   char pool_name[ODPH_TABLE_NAME_LEN + 8];
>>> +   char queue_name[ODPH_TABLE_NAME_LEN + 10];
>>> +   uint32_t size = 0, num = 0;
>>> +
>>> +   /* Create new pool (new free buffers). */
>>> +   param.type = ODP_POOL_BUFFER;
>>> +   param.buf.align = ODP_CACHE_LINE_SIZE;
>>> +   if (type == CACHE_TYPE_SUBTREE) {
>>> +           num = CACHE_NUM_SUBTREE;
>>> +           size = sizeof(prefix_entry_t) * ENTRY_NUM_SUBTREE;
>>> +   } else if (type == CACHE_TYPE_TRIE) {
>>> +           num = CACHE_NUM_TRIE;
>>> +           size = sizeof(trie_node_t);
>>> +   } else if (type == CACHE_TYPE_NEXTHOP) {
>>> +           num = CACHE_NUM_NEXTHOP;
>>> +           size = tbl->nexthop_len;
>>> +   } else {
>>> +           ODPH_DBG("wrong cache_type_t.\n");
>>> +           return -1;
>>> +   }
>>> +   param.buf.size = size;
>>> +   param.buf.num = num;
>>> +
>>> +   sprintf(
>>> +                   pool_name, "%s_%d_%d",
>>> +                   tbl->name, type, tbl->cache_count[type]);
>>> +   pool = odp_pool_create(pool_name, &param);
>>> +   if (pool == ODP_POOL_INVALID) {
>>> +           ODPH_DBG("failed to create a new pool.\n");
>>> +           return -1;
>>> +   }
>>> +
>>> +   /* insert new free buffers into queue */
>>> +   while ((buffer = odp_buffer_alloc(pool))
>>> +                   != ODP_BUFFER_INVALID) {
>>> +           cache_init_buffer(buffer, type, size);
>>> +           odp_queue_enq(queue, odp_buffer_to_event(buffer));
>>> +   }
>>> +
>>> +   tbl->cache_count[type]++;
>>> +   return 0;
>>> +}
>>> +
>>> +/** Get a new buffer from a cache list. If there is no
>>> + *  available buffer, allocate a new pool.
>>> + */
>>> +static odp_buffer_t
>>> +cache_get_buffer(odph_iplookup_table_impl *tbl, cache_type_t type)
>>> +{
>>> +   odp_buffer_t buffer = ODP_BUFFER_INVALID;
>>> +   odp_queue_t queue = tbl->free_slots[type];
>>> +   uint32_t size = 0;
>>> +
>>> +   /* get free buffer from queue */
>>> +   buffer = odp_buffer_from_event(
>>> +                           odp_queue_deq(queue));
>>> +
>>> +   /* If there is no free buffer available, allocate new pool */
>>> +   if (buffer == ODP_BUFFER_INVALID) {
>>> +           cache_alloc_new_pool(tbl, type);
>>> +           buffer = odp_buffer_from_event(odp_queue_deq(queue));
>>> +   }
>>> +
>>> +   return buffer;
>>> +}
>>> +
>>> +/***********************************************************
>>> + ******************     Binary trie     ********************
>>> + ***********************************************************/
>>> +
>>> +/* Initialize the root node of the trie */
>>> +static int
>>> +trie_init(odph_iplookup_table_impl *tbl)
>>> +{
>>> +   trie_node_t *root = NULL;
>>> +   odp_buffer_t buffer = cache_get_buffer(tbl, CACHE_TYPE_TRIE);
>>> +
>>> +   if (buffer != ODP_BUFFER_INVALID) {
>>> +           root = (trie_node_t *)odp_buffer_addr(buffer);
>>> +           root->cidr = 0;
>>> +           tbl->trie = root;
>>> +           return 0;
>>> +   }
>>> +
>>> +   return -1;
>>> +}
>>> +
>>> +/* Destroy the whole trie (recursively) */
>>> +static void
>>> +trie_destroy(odph_iplookup_table_impl *tbl, trie_node_t *trie)
>>> +{
>>> +   if (trie->left != NULL)
>>> +           trie_destroy(tbl, trie->left);
>>> +   if (trie->right != NULL)
>>> +           trie_destroy(tbl, trie->right);
>>> +
>>> +   /* destroy this node */
>>> +   if (trie->nexthop != ODP_BUFFER_INVALID) {
>>> +           odp_queue_enq(
>>> +                           tbl->free_slots[CACHE_TYPE_NEXTHOP],
>>> +                           odp_buffer_to_event(trie->nexthop));
>>> +   }
>>> +   odp_queue_enq(
>>> +                   tbl->free_slots[CACHE_TYPE_TRIE],
>>> +                   odp_buffer_to_event(trie->buffer));
>>> +}
>>> +
>>> +/* Insert a new prefix node into the trie
>>> + * If the node is already existed, update its nexthop info,
>>> + *   Return 0 and set nexthop pointer to INVALID.
>>> + * If the node is not exitsed, create this target node and
>>> + *   all nodes along the path from root to the target node.
>>> + *   Then return 0 and set nexthop pointer points to the
>>> + *   new buffer.
>>> + * Return -1 for error.
>>> + */
>>> +static int
>>> +trie_insert_node(
>>> +           odph_iplookup_table_impl *tbl, trie_node_t *root,
>>> +           uint32_t ip, uint8_t cidr, void *value,
>>> +           odp_buffer_t *nexthop)
>>> +{
>>> +   uint8_t level = 0, child;
>>> +   odp_buffer_t buf;
>>> +   trie_node_t *node = root, *prev = root;
>>> +   void *val_buf = NULL;
>>> +
>>> +   *nexthop = ODP_BUFFER_INVALID;
>>> +
>>> +   /* create/update all nodes along the path
>>> +    * from root to the new node. */
>>> +   for (level = 1; level <= cidr; level++) {
>>> +           child = WHICH_CHILD(ip, level);
>>> +
>>> +           node = child == 0 ? prev->left : prev->right;
>>> +           /* If the child node doesn't exit, create it. */
>>> +           if (node == NULL) {
>>> +                   buf = cache_get_buffer(tbl, CACHE_TYPE_TRIE);
>>> +                   if (buf == ODP_BUFFER_INVALID)
>>> +                           return -1;
>>> +
>>> +                   node = (trie_node_t *)odp_buffer_addr(buf);
>>> +                   node->cidr = level;
>>> +                   node->parent = prev;
>>> +
>>> +                   if (child == 0)
>>> +                           prev->left = node;
>>> +                   else
>>> +                           prev->right = node;
>>> +           }
>>> +           prev = node;
>>> +   }
>>> +
>>> +   /* The final one is the target. If its nexthop buffer
>>> +    * is invalid, create a new buffer.
>>> +    */
>>> +   if (node->nexthop == ODP_BUFFER_INVALID) {
>>> +           buf = cache_get_buffer(tbl, CACHE_TYPE_NEXTHOP);
>>> +           if (buf == ODP_BUFFER_INVALID)
>>> +                   return -1;
>>> +           val_buf = odp_buffer_addr(buf);
>>> +           *nexthop = buf;
>>> +           node->nexthop = buf;
>>> +   }
>>> +   /* copy the value into the nexthop buffer */
>>> +   memcpy(val_buf, value, tbl->nexthop_len);
>>> +   return 0;
>>> +}
>>> +
>>> +/* Delete a node */
>>> +static int
>>> +trie_delete_node(
>>> +           odph_iplookup_table_impl *tbl,
>>> +           trie_node_t *root, uint32_t ip, uint8_t cidr)
>>> +{
>>> +   if (root == NULL)
>>> +           return -1;
>>> +
>>> +   /* The default prefix (root node) cannot be deleted. */
>>> +   if (cidr == 0)
>>> +           return -1;
>>> +
>>> +   trie_node_t *node = root, *prev = NULL;
>>> +   uint8_t level = 1, child = 0;
>>> +   odp_buffer_t tmp;
>>> +
>>> +   /* Find the target node. */
>>> +   for (level = 1; level <= cidr; level++) {
>>> +           child = WHICH_CHILD(ip, level);
>>> +           node = (child == 0) ? node->left : node->right;
>>> +           if (node == NULL) {
>>> +                   ODPH_DBG("Trie node is not existed\n");
>>> +                   return -1;
>>> +           }
>>> +   }
>>> +
>>> +   /* free nexthop buffer */
>>> +   if (node->nexthop != ODP_BUFFER_INVALID) {
>>> +           cache_init_buffer(
>>> +                           node->nexthop, CACHE_TYPE_NEXTHOP,
>>> +                           tbl->nexthop_len);
>>> +           odp_queue_enq(
>>> +                           tbl->free_slots[CACHE_TYPE_NEXTHOP],
>>> +                           odp_buffer_to_event(node->nexthop));
>>> +           node->nexthop = ODP_BUFFER_INVALID;
>>> +   }
>>> +
>>> +   /* Delete all redundant nodes along the path. */
>>> +   for (level = cidr; level > 0; level--) {
>>> +           if (
>>> +                   node->left != NULL || node->right != NULL ||
>>> +                   node->nexthop != ODP_BUFFER_INVALID)
>>> +                   break;
>>> +
>>> +           child = WHICH_CHILD(ip, level);
>>> +           prev = node->parent;
>>> +
>>> +           /* free trie node */
>>> +           tmp = node->buffer;
>>> +           cache_init_buffer(
>>> +                           tmp, CACHE_TYPE_TRIE, sizeof(trie_node_t));
>>> +           odp_queue_enq(
>>> +                           tbl->free_slots[CACHE_TYPE_TRIE],
>>> +                           odp_buffer_to_event(tmp));
>>> +
>>> +           if (child == 0)
>>> +                   prev->left = NULL;
>>> +           else
>>> +                   prev->right = NULL;
>>> +           node = prev;
>>> +   }
>>> +   return 0;
>>> +}
>>> +
>>> +/* Detect the longest overlapping prefix. */
>>> +static int
>>> +trie_detect_overlap(
>>> +           trie_node_t *trie, uint32_t ip, uint8_t cidr,
>>> +           uint8_t leaf_push, uint8_t *over_cidr,
>>> +           odp_buffer_t *over_nexthop)
>>> +{
>>> +   uint8_t child = 0;
>>> +   uint32_t level, limit = cidr > leaf_push ? leaf_push + 1 : cidr;
>>> +   trie_node_t *node = trie, *longest = trie;
>>> +
>>> +   for (level = 1; level < limit; level++) {
>>> +           child = WHICH_CHILD(ip, level);
>>> +           node = (child == 0) ? node->left : node->right;
>>> +           if (node->nexthop != ODP_BUFFER_INVALID)
>>> +                   longest = node;
>>> +   }
>>> +
>>> +   *over_cidr = longest->cidr;
>>> +   *over_nexthop = longest->nexthop;
>>> +   return 0;
>>> +}
>>> +
>>> +/***********************************************************
>>> + ***************   IP prefix lookup table   ****************
>>> + ***********************************************************/
>>> +
>>> +odph_table_t
>>> +odph_iplookup_table_lookup(const char *name)
>>> +{
>>> +   odph_iplookup_table_impl *tbl = NULL;
>>> +
>>> +   if (name == NULL || strlen(name) >= ODPH_TABLE_NAME_LEN)
>>> +           return NULL;
>>> +
>>> +   tbl = (odph_iplookup_table_impl *)odp_shm_addr(odp_shm_lookup(name));
>>> +
>>> +   if (
>>> +           tbl != NULL &&
>>> +           tbl->magicword == ODPH_IP_LOOKUP_TABLE_MAGIC_WORD &&
>>> +           strcmp(tbl->name, name) == 0)
>>> +           return (odph_table_t)tbl;
>>> +}
>>> +
>>> +odph_table_t
>>> +odph_iplookup_table_create(
>>> +           const char *name, uint32_t ODP_IGNORED_1,
>>> +           uint32_t ODP_IGNORED_2, uint32_t value_size)
>>> +{
>>> +   odph_iplookup_table_impl *tbl;
>>> +   odp_shm_t shm_tbl;
>>> +   odp_queue_t queue;
>>> +   odp_queue_param_t qparam;
>>> +
>>> +   unsigned i;
>>> +   uint32_t impl_size, l1_size;
>>> +   char queue_name[ODPH_TABLE_NAME_LEN + 2];
>>> +
>>> +   /* Check for valid parameters */
>>> +   if (strlen(name) == 0) {
>>> +           ODPH_DBG("invalid parameters\n");
>>> +           return NULL;
>>> +   }
>>> +
>>> +   /* Guarantee there's no existing */
>>> +   tbl = (odph_iplookup_table_impl *)odph_iplookup_table_lookup(name);
>>> +   if (tbl != NULL) {
>>> +           ODPH_DBG("IP prefix table %s already exists\n", name);
>>> +           return NULL;
>>> +   }
>>> +
>>> +   /* Calculate the sizes of different parts of IP prefix table */
>>> +   impl_size = sizeof(odph_iplookup_table_impl);
>>> +   l1_size = sizeof(prefix_entry_t) * ENTRY_NUM_L1;
>>> +
>>> +   shm_tbl = odp_shm_reserve(
>>> +                           name, impl_size + l1_size,
>>> +                           ODP_CACHE_LINE_SIZE, ODP_SHM_SW_ONLY);
>>> +
>>> +   if (shm_tbl == ODP_SHM_INVALID) {
>>> +           ODPH_DBG(
>>> +                   "shm allocation failed for odph_iplookup_table_impl 
>>> %s\n",
>>> +                   name);
>>> +           return NULL;
>>> +   }
>>> +
>>> +   tbl = (odph_iplookup_table_impl *)odp_shm_addr(shm_tbl);
>>> +   memset(tbl, 0, impl_size + l1_size);
>>> +
>>> +   /* header of this mem block is the table impl struct,
>>> +    * then the l1 entries array.
>>> +    */
>>> +   tbl->l1e = (prefix_entry_t *)((char *)tbl + impl_size);
>>> +   for (i = 0; i < ENTRY_NUM_L1; i++) {
>>> +           tbl->l1e[i].index = ODP_BUFFER_INVALID;
>>> +           tbl->l1e[i].ptr = NULL;
>>> +   }
>>> +
>>> +   /* Setup table context. */
>>> +   snprintf(tbl->name, sizeof(tbl->name), "%s", name);
>>> +   tbl->magicword = ODPH_IP_LOOKUP_TABLE_MAGIC_WORD;
>>> +   tbl->nexthop_len = value_size;
>>> +
>>> +   /* Initialize cache */
>>> +   for (i = 0; i < 3; i++) {
>>> +           tbl->cache_count[i] = 0;
>>> +
>>> +           odp_queue_param_init(&qparam);
>>> +           qparam.type = ODP_QUEUE_TYPE_PLAIN;
>>> +           sprintf(queue_name, "%s_%d", name, i);
>>> +           queue = odp_queue_create(queue_name, &qparam);
>>> +           if (queue == ODP_QUEUE_INVALID) {
>>> +                   ODPH_DBG("failed to create queue");
>>> +                   cache_destroy(tbl);
>>> +                   return NULL;
>>> +           }
>>> +           tbl->free_slots[i] = queue;
>>> +           cache_alloc_new_pool(tbl, i);
>>> +   }
>>> +
>>> +   /* Initialize tire */
>>> +   if (trie_init(tbl) < 0) {
>>> +           odp_shm_free(shm_tbl);
>>> +           return NULL;
>>> +   }
>>> +
>>> +   return (odph_table_t)tbl;
>>> +}
>>> +
>>> +int
>>> +odph_iplookup_table_destroy(odph_table_t tbl)
>>> +{
>>> +   int i, j;
>>> +   odph_iplookup_table_impl *impl = NULL;
>>> +   prefix_entry_t *subtree = NULL;
>>> +
>>> +   if (tbl == NULL)
>>> +           return -1;
>>> +
>>> +   impl = (odph_iplookup_table_impl *)tbl;
>>> +
>>> +   /* check magic word */
>>> +   if (impl->magicword != ODPH_IP_LOOKUP_TABLE_MAGIC_WORD) {
>>> +           ODPH_DBG("wrong magicword for IP prefix table\n");
>>> +           return -1;
>>> +   }
>>> +
>>> +   /* destroy trie */
>>> +   trie_destroy(impl, impl->trie);
>>> +
>>> +   /* free all L2 and L3 entries */
>>> +   for (i = 0; i < ENTRY_NUM_L1; i++) {
>>> +           if ((impl->l1e[i]).child == 0)
>>> +                   continue;
>>> +
>>> +           subtree = (prefix_entry_t *)odp_buffer_addr(
>>> +                                           impl->l1e[i].index);
>>> +           /* destroy all l3 subtrees of this l2 subtree */
>>> +           for (j = 0; j < ENTRY_NUM_SUBTREE; j++) {
>>> +                   if (subtree[j].child == 0)
>>> +                           continue;
>>> +                   odp_queue_enq(
>>> +                                   impl->free_slots[CACHE_TYPE_TRIE],
>>> +                                   odp_buffer_to_event(subtree[j].index));
>>> +           }
>>> +           /* destroy this l2 subtree */
>>> +           odp_queue_enq(
>>> +                           impl->free_slots[CACHE_TYPE_TRIE],
>>> +                           odp_buffer_to_event(impl->l1e[i].index));
>>> +   }
>>> +
>>> +   /* destroy all cache */
>>> +   cache_destroy(impl);
>>> +
>>> +   /* free impl */
>>> +   odp_shm_free(odp_shm_lookup(impl->name));
>>> +}
>>> +
>>> +/* Insert the prefix into level x
>>> + * Return:
>>> + *   -1    error
>>> + *   0     the table is unmodified
>>> + *   1     the table is modified
>>> + */
>>> +static int
>>> +prefix_insert_into_lx(
>>> +           odph_iplookup_table_impl *tbl, prefix_entry_t *entry,
>>> +           uint8_t cidr, odp_buffer_t nexthop, uint8_t level)
>>> +{
>>> +   uint8_t ret = 0;
>>> +   uint32_t i = 0, limit = (1 << (level - cidr));
>>> +   prefix_entry_t *e = entry, *ne = NULL;
>>> +
>>> +   for (i = 0; i < limit; i++, e++) {
>>> +           if (e->child == 1) {
>>> +                   if (e->cidr > cidr)
>>> +                           continue;
>>> +
>>> +                   e->cidr = cidr;
>>> +                   if (e->index == ODP_BUFFER_INVALID) {
>>> +                           ODPH_DBG("Subtree is not existed.\n");
>>> +                           return -1;
>>> +                   }
>>> +                   /* push to next level */
>>> +                   ne = odp_buffer_addr(e->index);
>>> +                   ret = prefix_insert_into_lx(
>>> +                                   tbl, ne, cidr, nexthop, cidr + 8);
>>> +           } else {
>>> +                   if (e->cidr > cidr)
>>> +                           continue;
>>> +
>>> +                   e->child = 0;
>>> +                   e->cidr = cidr;
>>> +                   e->index = nexthop;
>>> +                   ret = 1;
>>> +           }
>>> +   }
>>> +   return ret;
>>> +}
>>> +
>>> +static int
>>> +prefix_insert_iter(
>>> +           odph_iplookup_table_impl *tbl, prefix_entry_t *entry,
>>> +           uint32_t ip, uint8_t cidr, odp_buffer_t nexthop,
>>> +           uint8_t level, uint8_t depth)
>>> +{
>>> +   uint8_t state = 0;
>>> +   prefix_entry_t *ne = NULL;
>>> +   int i = 0;
>>> +
>>> +   /* If child subtree is existed, get it. */
>>> +   if (entry->child) {
>>> +           ne = (prefix_entry_t *)odp_buffer_addr(entry->index);
>>> +   } else {
>>> +           /* If the child is not existed, create a new subtree. */
>>> +           odp_buffer_t buf, push = entry->index;
>>> +
>>> +           buf = cache_get_buffer(tbl, CACHE_TYPE_SUBTREE);
>>> +           if (buf == ODP_BUFFER_INVALID) {
>>> +                   ODPH_DBG("failed to get subtree buffer from cache.\n");
>>> +                   return -1;
>>> +           }
>>> +           ne = (prefix_entry_t *)odp_buffer_addr(buf);
>>> +
>>> +           /* initialize subtree */
>>> +           for (; i < ENTRY_NUM_SUBTREE; i++)
>>> +                   ne[i].index = ODP_BUFFER_INVALID;
>>> +
>>> +           entry->child = 1;
>>> +           entry->index = buf;
>>> +           entry->ptr = ne;
>>> +
>>> +           /* If this entry contains a nexthop and a small cidr,
>>> +            * push it to the next level.
>>> +            */
>>> +           if (entry->cidr > 0) {
>>> +                   state = prefix_insert_into_lx(
>>> +                                   tbl, ne, entry->cidr,
>>> +                                   push, entry->cidr + 8);
>>> +           }
>>> +   }
>>> +
>>> +   ne += (ip >> 24);
>>> +   if (cidr <= 8) {
>>> +           state = prefix_insert_into_lx(
>>> +                           tbl, ne, cidr + depth * 8, nexthop, level);
>>> +   } else {
>>> +           state = prefix_insert_iter(
>>> +                           tbl, ne, ip << 8, cidr - 8,
>>> +                           nexthop, level + 8, depth + 1);
>>> +   }
>>> +
>>> +   return state;
>>> +}
>>> +
>>> +int
>>> +odph_iplookup_table_put_value(odph_table_t tbl, void *key, void *value)
>>> +{
>>> +   if ((tbl == NULL) || (key == NULL) || (value == NULL))
>>> +           return -1;
>>> +
>>> +   odph_iplookup_table_impl *impl = (odph_iplookup_table_impl *)tbl;
>>> +   odph_iplookup_prefix_t *prefix = (odph_iplookup_prefix_t *)key;
>>> +   prefix_entry_t *l1e = NULL;
>>> +   odp_buffer_t nexthop;
>>> +   int ret = 0;
>>> +
>>> +   if (prefix->cidr == 0)
>>> +           return -1;
>>> +   prefix->ip = prefix->ip & (0xffffffff << (IP_LENGTH - prefix->cidr));
>>> +
>>> +   /* insert into trie */
>>> +   ret = trie_insert_node(
>>> +                           impl, impl->trie, prefix->ip, prefix->cidr,
>>> +                           value, &nexthop);
>>> +
>>> +   if (ret < 0) {
>>> +           ODPH_DBG("failed to insert into trie\n");
>>> +           return -1;
>>> +   }
>>> +
>>> +   /* If the prefix is already existed, trie_insert_node has
>>> +    * already updated the corresponding nexthop buffer. The
>>> +    * insertion finished.
>>> +    */
>>> +   if (nexthop == ODP_BUFFER_INVALID)
>>> +           return 0;
>>> +
>>> +   /* get L1 entry */
>>> +   l1e = &impl->l1e[prefix->ip >> 16];
>>> +
>>> +   if (prefix->cidr <= 16) {
>>> +           ret = prefix_insert_into_lx(
>>> +                           impl, l1e, prefix->cidr, nexthop, 16);
>>> +   } else {
>>> +           ret = prefix_insert_iter(
>>> +                           impl, l1e,
>>> +                           ((prefix->ip) << 16), prefix->cidr - 16,
>>> +                           nexthop, 24, 2);
>>> +   }
>>> +
>>> +   return ret;
>>> +}
>>> +
>>> +int
>>> +odph_iplookup_table_get_value(
>>> +           odph_table_t tbl, void *key, void *buffer, uint32_t buffer_size)
>>> +{
>>> +   if ((tbl == NULL) || (key == NULL) || (buffer == NULL))
>>> +           return -EINVAL;
>>> +
>>> +   odph_iplookup_table_impl *impl = (odph_iplookup_table_impl *)tbl;
>>> +   uint32_t ip = *((uint32_t *)key);
>>> +   prefix_entry_t *entry = &impl->l1e[ip >> 16];
>>> +
>>> +   if (entry == NULL) {
>>> +           ODPH_DBG("failed to get L1 entry.\n");
>>> +           return -1;
>>> +   }
>>> +
>>> +   ip <<= 16;
>>> +   while (entry->child) {
>>> +           entry = (prefix_entry_t *)entry->ptr;
>>> +           entry += ip >> 24;
>>> +           ip <<= 8;
>>> +   }
>>> +
>>> +   /* copy data */
>>> +   if (entry->index == ODP_BUFFER_INVALID) {
>>> +           /* ONLY match the default prefix */
>>> +           memset(buffer, 0, impl->nexthop_len);
>>> +   } else {
>>> +           memcpy(
>>> +                   buffer, odp_buffer_addr(entry->index),
>>> +                   impl->nexthop_len);
>>> +   }
>>> +
>>> +   return 0;
>>> +}
>>> +
>>> +static int
>>> +prefix_delete_lx(
>>> +           odph_iplookup_table_impl *tbl, prefix_entry_t *l1e,
>>> +           uint8_t cidr, uint8_t over_cidr, odp_buffer_t over_nexthop,
>>> +           uint8_t level)
>>> +{
>>> +   uint8_t ret, flag = 1;
>>> +   prefix_entry_t *e = l1e;
>>> +   uint32_t i = 0, limit = 1 << (level - cidr);
>>> +
>>> +   for (i = 0; i < limit; i++, e++) {
>>> +           if (e->child == 1) {
>>> +                   if (e->cidr > cidr) {
>>> +                           flag = 0;
>>> +                           continue;
>>> +                   }
>>> +
>>> +                   prefix_entry_t *ne = (prefix_entry_t *)odp_buffer_addr(
>>> +                                   e->index);
>>> +
>>> +                   e->cidr = over_cidr;
>>> +                   ret = prefix_delete_lx(
>>> +                                   tbl, ne, cidr, over_cidr,
>>> +                                   over_nexthop, cidr + 8);
>>> +
>>> +                   /* If ret == 1, the next 2^8 entries equal to
>>> +                    * (over_cidr, over_nexthop). In this case, we
>>> +                    * should not push the (over_cidr, over_nexthop)
>>> +                    * to the next level. In fact, we should recycle
>>> +                    * the next 2^8 entries.
>>> +                    */
>>> +                   if (ret) {
>>> +                           /* destroy subtree */
>>> +                           cache_init_buffer(
>>> +                                   e->index, CACHE_TYPE_SUBTREE,
>>> +                                   sizeof(prefix_entry_t)
>>> +                                           * ENTRY_NUM_SUBTREE);
>>> +                           odp_queue_enq(
>>> +                                   tbl->free_slots[CACHE_TYPE_SUBTREE],
>>> +                                   odp_buffer_to_event(e->index));
>>> +                           e->child = 0;
>>> +                           e->index = over_nexthop;
>>> +                   } else {
>>> +                           flag = 0;
>>> +                   }
>>> +           } else {
>>> +                   if (e->cidr > cidr) {
>>> +                           flag = 0;
>>> +                           continue;
>>> +                   } else {
>>> +                           e->cidr = over_cidr;
>>> +                           e->index = over_nexthop;
>>> +                   }
>>> +           }
>>> +   }
>>> +   return flag;
>>> +}
>>> +
>>> +/* Check if the entry can be recycled.
>>> + * An entry can be recycled duo to two reasons:
>>> + * - all children of the entry are the same,
>>> + * - all children of the entry have a cidr smaller than the level
>>> + *   bottom bound.
>>> + */
>>> +static uint8_t
>>> +can_recycle(prefix_entry_t *e, uint32_t level)
>>> +{
>>> +   uint8_t recycle = 1;
>>> +   int i = 1;
>>> +   prefix_entry_t *ne = (prefix_entry_t *)odp_buffer_addr(e->index);
>>> +
>>> +   if (ne->child)
>>> +           return 0;
>>> +
>>> +   uint8_t cidr = ne->cidr;
>>> +   odp_buffer_t index = ne->index;
>>> +
>>> +   if (cidr > level)
>>> +           return 0;
>>> +
>>> +   ne++;
>>> +   for (; i < 256; i++, ne++) {
>>> +           if (ne->child != 0 || ne->cidr != cidr || ne->index != index) {
>>> +                   recycle = 0;
>>> +                   break;
>>> +           }
>>> +   }
>>> +   return recycle;
>>> +}
>>> +
>>> +static uint8_t
>>> +prefix_delete_iter(
>>> +           odph_iplookup_table_impl *tbl, prefix_entry_t *e,
>>> +           uint32_t ip, uint8_t cidr, uint8_t level, uint8_t depth)
>>> +{
>>> +   uint8_t ret = 0, over_cidr;
>>> +   odp_buffer_t over_nexthop;
>>> +
>>> +   trie_detect_overlap(
>>> +                   tbl->trie, ip, cidr + 8 * depth, level,
>>> +                   &over_cidr, &over_nexthop);
>>> +   if (cidr > 8) {
>>> +           prefix_entry_t *ne =
>>> +                   (prefix_entry_t *)odp_buffer_addr(e->index);
>>> +           prefix_entry_t *org_ne = ne;
>>> +
>>> +           ne += ((uint32_t)(ip << level) >> 24);
>>> +           ret = prefix_delete_iter(
>>> +                           tbl, ne, ip, cidr - 8, level + 8, depth + 1);
>>> +
>>> +           if (ret && can_recycle(e, level)) {
>>> +                   /* destroy subtree */
>>> +                   cache_init_buffer(
>>> +                           e->index, CACHE_TYPE_SUBTREE,
>>> +                           sizeof(prefix_entry_t) * ENTRY_NUM_SUBTREE);
>>> +                   odp_queue_enq(
>>> +                           tbl->free_slots[CACHE_TYPE_SUBTREE],
>>> +                           odp_buffer_to_event(e->index));
>>> +                   e->child = 0;
>>> +                   e->index = over_nexthop;
>>> +                   e->cidr = over_cidr;
>>> +                   return 1;
>>> +           }
>>> +           return 0;
>>> +   }
>>> +
>>> +   ret = prefix_delete_lx(
>>> +                   tbl, e, cidr + 8 * depth,
>>> +                   over_cidr, over_nexthop, level);
>>> +   return ret;
>>> +}
>>> +
>>> +int
>>> +odph_iplookup_table_remove_value(odph_table_t tbl, void *key)
>>> +{
>>> +   if ((tbl == NULL) || (key == NULL))
>>> +           return -EINVAL;
>>> +
>>> +   odph_iplookup_table_impl *impl = (odph_iplookup_table_impl *)tbl;
>>> +   odph_iplookup_prefix_t *prefix = (odph_iplookup_prefix_t *)key;
>>> +   uint32_t ip = prefix->ip;
>>> +   uint8_t cidr = prefix->cidr;
>>> +
>>> +   if (prefix->cidr < 0)
>>> +           return -EINVAL;
>>> +
>>> +   prefix_entry_t *entry = &impl->l1e[ip >> 16];
>>> +   uint8_t over_cidr, ret;
>>> +   odp_buffer_t over_nexthop;
>>> +
>>> +   trie_detect_overlap(
>>> +                   impl->trie, ip, cidr, 16, &over_cidr, &over_nexthop);
>>> +
>>> +   if (cidr <= 16) {
>>> +           prefix_delete_lx(
>>> +                   impl, entry, cidr, over_cidr, over_nexthop, 16);
>>> +   } else {
>>> +           prefix_entry_t *ne = (prefix_entry_t *)odp_buffer_addr(
>>> +                           entry->index);
>>> +           prefix_entry_t *org_ne = ne;
>>> +
>>> +           ne += ((uint32_t)(ip << 16) >> 24);
>>> +           ret = prefix_delete_iter(impl, ne, ip, cidr - 16, 24, 2);
>>> +
>>> +           if (ret && can_recycle(entry, 16)) {
>>> +                   /* destroy subtree */
>>> +                   cache_init_buffer(
>>> +                           entry->index, CACHE_TYPE_SUBTREE,
>>> +                           sizeof(prefix_entry_t) * ENTRY_NUM_SUBTREE);
>>> +                   odp_queue_enq(
>>> +                           impl->free_slots[CACHE_TYPE_SUBTREE],
>>> +                           odp_buffer_to_event(entry->index));
>>> +                   entry->child = 0;
>>> +                   entry->cidr = over_cidr;
>>> +                   entry->index = over_nexthop;
>>> +           }
>>> +   }
>>> +
>>> +   return trie_delete_node(impl, impl->trie, ip, cidr);
>>> +}
>>> +
>>> +odph_table_ops_t odph_iplookup_table_ops = {
>>> +   odph_iplookup_table_create,
>>> +   odph_iplookup_table_lookup,
>>> +   odph_iplookup_table_destroy,
>>> +   odph_iplookup_table_put_value,
>>> +   odph_iplookup_table_get_value,
>>> +   odph_iplookup_table_remove_value
>>> +};
>>> diff --git a/helper/odph_iplookuptable.h b/helper/odph_iplookuptable.h
>>> new file mode 100644
>>> index 0000000..e6040ca
>>> --- /dev/null
>>> +++ b/helper/odph_iplookuptable.h
>>> @@ -0,0 +1,58 @@
>>> +/* Copyright (c) 2015, Linaro Limited
>>> + * All rights reserved.
>>> + *
>>> + * SPDX-License-Identifier:    BSD-3-Clause
>>> + */
>>> +
>>> +/**
>>> + * @file
>>> + *
>>> + * ODP IP Lookup Table
>>> + *
>>> + * This is an implementation of the IP lookup table. The key of
>>> + * this table is IPv4 address (32 bits), and the value can be
>>> + * defined by user. This table uses the 16,8,8 ip lookup (longest
>>> + * prefix matching) algorithm.
>>> + */
>>> +
>>> +#ifndef odph_iplookup_TABLE_H_
>>> +#define odph_iplookup_TABLE_H_
>>> +
>>> +#include <odp/helper/table.h>
>>> +
>>> +#ifdef __cplusplus
>>> +extern "C" {
>>> +#endif
>>> +
>>> +typedef struct {
>>> +   uint32_t ip;
>>> +   uint8_t cidr;
>>> +} odph_iplookup_prefix_t;
>>> +
>>> +odph_table_t odph_iplookup_table_create(
>>> +           const char *name,
>>> +           uint32_t ODP_IGNORED_1,
>>> +           uint32_t ODP_IGNORED_2,
>>> +           uint32_t value_size);
>>> +
>>> +odph_table_t odph_iplookup_table_lookup(const char *name);
>>> +
>>> +int odph_iplookup_table_destroy(odph_table_t table);
>>> +
>>> +int odph_iplookup_table_put_value(
>>> +           odph_table_t table, void *key, void *value);
>>> +
>>> +int odph_iplookup_table_get_value(
>>> +           odph_table_t table, void *key,
>>> +           void *buffer, uint32_t buffer_size);
>>> +
>>> +int odph_iplookup_table_remove_value(
>>> +           odph_table_t table, void *key);
>>> +
>>> +extern odph_table_ops_t odph_iplookup_table_ops;
>>> +
>>> +#ifdef __cplusplus
>>> +}
>>> +#endif
>>> +
>>> +#endif /* odph_iplookup_TABLE_H_ */
>>> -- 
>>> 1.9.1
>>> 
>>> 
>>> _______________________________________________
>>> lng-odp mailing list
>>> [email protected]
>>> https://lists.linaro.org/mailman/listinfo/lng-odp
>> 
>> _______________________________________________
>> lng-odp mailing list
>> [email protected]
>> https://lists.linaro.org/mailman/listinfo/lng-odp
> 


_______________________________________________
lng-odp mailing list
[email protected]
https://lists.linaro.org/mailman/listinfo/lng-odp

Reply via email to