Introduced Intel TSX-enabled scalable multi-writer Cuckoo hash
insertion.

This patch introduced scalable multi-writer Cuckoo Hash insertion
based on a split Cuckoo Search and Move operation using Intel
TSX. It can do scalable hash insertion with 22 cores with little
performance loss and negligible TSX abortion rate.

* Added an extra rte_hash flag definition to switch default
  single writer Cuckoo Hash behavior to multiwriter.

* Added a make_space_insert_bfs_mw() function to do split Cuckoo
  search in BFS order.

* Added tsx_cuckoo_move_insert() to do Cuckoo move in Intel TSX
  protected manner.

* Added test_hash_multiwriter() as test case for multi-writer
  Cuckoo Hash.

Signed-off-by: Shen Wei <wei1.shen at intel.com>
Signed-off-by: Sameh Gobriel <sameh.gobriel at intel.com>
---
 app/test/Makefile                 |   1 +
 app/test/test_hash_multiwriter.c  | 272 ++++++++++++++++++++++++++++++++++++++
 lib/librte_hash/rte_cuckoo_hash.c | 228 ++++++++++++++++++++++++++++----
 lib/librte_hash/rte_hash.h        |   3 +
 4 files changed, 480 insertions(+), 24 deletions(-)
 create mode 100644 app/test/test_hash_multiwriter.c

diff --git a/app/test/Makefile b/app/test/Makefile
index a4907d5..697837a 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -87,6 +87,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_thash.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_perf.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_functions.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_scaling.c
+SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_multiwriter.c

 SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm.c
 SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm6.c
diff --git a/app/test/test_hash_multiwriter.c b/app/test/test_hash_multiwriter.c
new file mode 100644
index 0000000..bdb9840
--- /dev/null
+++ b/app/test/test_hash_multiwriter.c
@@ -0,0 +1,272 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in
+ *      the documentation and/or other materials provided with the
+ *      distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <inttypes.h>
+#include <locale.h>
+
+#include <rte_cycles.h>
+#include <rte_hash.h>
+#include <rte_hash_crc.h>
+#include <rte_launch.h>
+#include <rte_malloc.h>
+#include <rte_random.h>
+#include <rte_spinlock.h>
+
+#include "test.h"
+
+/*
+ * Check condition and return an error if true. Assumes that "handle" is the
+ * name of the hash structure pointer to be freed.
+ */
+#define RETURN_IF_ERROR(cond, str, ...) do {                            \
+       if (cond) {                                                     \
+               printf("ERROR line %d: " str "\n", __LINE__,            \
+                                                       ##__VA_ARGS__); \
+               if (handle)                                             \
+                       rte_hash_free(handle);                          \
+               return -1;                                              \
+       }                                                               \
+} while (0)
+
+#define RTE_APP_TEST_HASH_MULTIWRITER_FAILED 0
+
+struct {
+       uint32_t *keys;
+       uint32_t *found;
+       uint32_t nb_tsx_insertion;
+       struct rte_hash *h;
+} tbl_multiwriter_test_params;
+
+const uint32_t nb_entries = 16*1024*1024;
+const uint32_t nb_total_tsx_insertion = 15*1024*1024;
+uint32_t rounded_nb_total_tsx_insertion;
+
+static rte_atomic64_t gcycles;
+static rte_atomic64_t ginsertions;
+
+static int
+test_hash_multiwriter_worker(__attribute__((unused)) void *arg)
+{
+       uint64_t i, offset;
+       uint32_t lcore_id = rte_lcore_id();
+       uint64_t begin, cycles;
+
+       offset = (lcore_id - rte_get_master_lcore())
+               * tbl_multiwriter_test_params.nb_tsx_insertion;
+
+       printf("Core #%d inserting %d: %'"PRId64" - %'"PRId64"\n",
+              lcore_id, tbl_multiwriter_test_params.nb_tsx_insertion,
+              offset, offset + tbl_multiwriter_test_params.nb_tsx_insertion);
+
+       begin = rte_rdtsc_precise();
+
+       for (i = offset;
+            i < offset + tbl_multiwriter_test_params.nb_tsx_insertion;
+            i++) {
+               if (rte_hash_add_key(tbl_multiwriter_test_params.h,
+                                    tbl_multiwriter_test_params.keys + i) < 0)
+                       break;
+       }
+
+       cycles = rte_rdtsc_precise() - begin;
+       rte_atomic64_add(&gcycles, cycles);
+       rte_atomic64_add(&ginsertions, i - offset);
+
+       for (; i < offset + tbl_multiwriter_test_params.nb_tsx_insertion; i++)
+               tbl_multiwriter_test_params.keys[i]
+                       = RTE_APP_TEST_HASH_MULTIWRITER_FAILED;
+
+       return 0;
+}
+
+
+static int
+test_hash_multiwriter(void)
+{
+       unsigned int i, rounded_nb_total_tsx_insertion;
+       static unsigned calledCount = 1;
+
+       uint32_t *keys;
+       uint32_t *found;
+
+       struct rte_hash_parameters hash_params = {
+               .entries = nb_entries,
+               .key_len = sizeof(uint32_t),
+               .hash_func = rte_hash_crc,
+               .hash_func_init_val = 0,
+               .socket_id = rte_socket_id(),
+               .extra_flag = RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT
+               | RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD,
+       };
+
+       struct rte_hash *handle;
+       char name[RTE_HASH_NAMESIZE];
+
+       const void *next_key;
+       void *next_data;
+       uint32_t iter = 0;
+
+       uint32_t duplicated_keys = 0;
+       uint32_t lost_keys = 0;
+
+       snprintf(name, 32, "test%u", calledCount++);
+       hash_params.name = name;
+
+       handle = rte_hash_create(&hash_params);
+       RETURN_IF_ERROR(handle == NULL, "hash creation failed");
+
+       tbl_multiwriter_test_params.h = handle;
+       tbl_multiwriter_test_params.nb_tsx_insertion =
+               nb_total_tsx_insertion / rte_lcore_count();
+
+       rounded_nb_total_tsx_insertion = (nb_total_tsx_insertion /
+               tbl_multiwriter_test_params.nb_tsx_insertion)
+               * tbl_multiwriter_test_params.nb_tsx_insertion;
+
+       rte_srand(rte_rdtsc());
+
+       keys = rte_malloc(NULL, sizeof(uint32_t) * nb_entries, 0);
+
+       if (keys == NULL) {
+               printf("RTE_MALLOC failed\n");
+               goto err1;
+       }
+
+       found = rte_zmalloc(NULL, sizeof(uint32_t) * nb_entries, 0);
+       if (found == NULL) {
+               printf("RTE_ZMALLOC failed\n");
+               goto err2;
+       }
+
+       for (i = 0; i < nb_entries; i++)
+               keys[i] = i;
+
+       tbl_multiwriter_test_params.keys = keys;
+       tbl_multiwriter_test_params.found = found;
+
+       rte_atomic64_init(&gcycles);
+       rte_atomic64_clear(&gcycles);
+
+       rte_atomic64_init(&ginsertions);
+       rte_atomic64_clear(&ginsertions);
+
+       /* Fire all threads. */
+       rte_eal_mp_remote_launch(test_hash_multiwriter_worker,
+                                NULL, CALL_MASTER);
+       rte_eal_mp_wait_lcore();
+
+       while (rte_hash_iterate(handle, &next_key, &next_data, &iter) >= 0) {
+               /* Search for the key in the list of keys added .*/
+               i = *(const uint32_t *)next_key;
+               tbl_multiwriter_test_params.found[i]++;
+       }
+
+       for (i = 0; i < rounded_nb_total_tsx_insertion; i++) {
+               if (tbl_multiwriter_test_params.keys[i]
+                   != RTE_APP_TEST_HASH_MULTIWRITER_FAILED) {
+                       if (tbl_multiwriter_test_params.found[i] > 1) {
+                               duplicated_keys++;
+                               break;
+                       }
+                       if (tbl_multiwriter_test_params.found[i] == 0) {
+                               lost_keys++;
+                               printf("key %d is lost\n", i);
+                               break;
+                       }
+               }
+       }
+
+       if (duplicated_keys > 0) {
+               printf("%d key duplicated\n", duplicated_keys);
+               goto err3;
+       }
+
+       if (lost_keys > 0) {
+               printf("%d key lost\n", lost_keys);
+               goto err3;
+       }
+
+       printf("No key corruped during multiwriter insertion.\n");
+
+       unsigned long long int cycles_per_insertion =
+               rte_atomic64_read(&gcycles)/
+               rte_atomic64_read(&ginsertions);
+
+       printf(" cycles per insertion: %llu\n", cycles_per_insertion);
+
+       rte_free(tbl_multiwriter_test_params.found);
+       rte_free(tbl_multiwriter_test_params.keys);
+       rte_hash_free(handle);
+       return 0;
+
+err3:
+       rte_free(tbl_multiwriter_test_params.found);
+err2:
+       rte_free(tbl_multiwriter_test_params.keys);
+err1:
+       rte_hash_free(handle);
+       return -1;
+}
+
+static int
+test_hash_multiwriter_main(void)
+{
+       int r = -1;
+
+       if (rte_lcore_count() == 1) {
+               printf(
+                       "More than one lcores are required to do multiwriter 
test\n");
+               return r;
+       }
+
+       if (!rte_tm_supported()) {
+               printf(
+                       "Hardware transactional memory (lock elision) is NOT 
supported\n");
+               return r;
+       }
+
+       printf("Hardware transactional memory (lock elision) is supported\n");
+
+       setlocale(LC_NUMERIC, "");
+
+       r = test_hash_multiwriter();
+
+       return r;
+}
+
+
+static struct test_command hash_scaling_cmd = {
+       .command = "hash_multiwriter_autotest",
+       .callback = test_hash_multiwriter_main,
+};
+
+REGISTER_TEST_COMMAND(hash_scaling_cmd);
diff --git a/lib/librte_hash/rte_cuckoo_hash.c 
b/lib/librte_hash/rte_cuckoo_hash.c
index 7b7d1f8..5a01f51 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -100,7 +100,9 @@ EAL_REGISTER_TAILQ(rte_hash_tailq)

 #define KEY_ALIGNMENT                  16

-#define LCORE_CACHE_SIZE               8
+#define LCORE_CACHE_SIZE               64
+
+#define RTE_HASH_BFS_QUEUE_MAX_LEN  5000

 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
 /*
@@ -190,6 +192,7 @@ struct rte_hash {
                                                        memory support */
        struct lcore_cache *local_free_slots;
        /**< Local cache per lcore, storing some indexes of the free slots */
+       uint8_t multiwrite_add; /**< Multi-write safe hash add behavior */
 } __rte_cache_aligned;

 /* Structure storing both primary and secondary hashes */
@@ -213,6 +216,9 @@ struct rte_hash_key {
        char key[0];
 } __attribute__((aligned(KEY_ALIGNMENT)));

+#define RTE_HASH_KEY_FLAG_UNMOVED 0
+#define RTE_HASH_KEY_FLAG_MOVED 1
+
 /** Bucket structure */
 struct rte_hash_bucket {
        struct rte_hash_signatures signatures[RTE_HASH_BUCKET_ENTRIES];
@@ -372,7 +378,7 @@ rte_hash_create(const struct rte_hash_parameters *params)

 /*
  * If x86 architecture is used, select appropriate compare function,
- * which may use x86 instrinsics, otherwise use memcmp
+ * which may use x86 intrinsics, otherwise use memcmp
  */
 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
        /* Select function to compare keys */
@@ -431,7 +437,16 @@ rte_hash_create(const struct rte_hash_parameters *params)
        h->free_slots = r;
        h->hw_trans_mem_support = hw_trans_mem_support;

-       /* populate the free slots ring. Entry zero is reserved for key misses 
*/
+       /* Turn on multi-writer only with explicit flat from user and TM
+        * support.
+        */
+       if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD
+           && h->hw_trans_mem_support)
+               h->multiwrite_add = 1;
+       else
+               h->multiwrite_add = 0;
+
+       /* Populate free slots ring. Entry zero is reserved for key misses. */
        for (i = 1; i < params->entries + 1; i++)
                rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));

@@ -599,6 +614,131 @@ make_space_bucket(const struct rte_hash *h, struct 
rte_hash_bucket *bkt)

 }

+struct queue_node {
+       struct rte_hash_bucket *bkt;
+
+       struct queue_node *prev;
+       int prev_slot;
+};
+
+/* Shift buckets along cuckoo_path and fill the path head with new entry */
+static inline int
+tsx_cuckoo_move_insert(const struct rte_hash *h, struct queue_node *leaf,
+                      uint32_t leaf_slot, hash_sig_t sig,
+                      hash_sig_t alt_hash, uint32_t new_idx, uint8_t flag)
+{
+       #define RTE_XABORT_CUCKOO_PATH_INVALIDED 0x4
+       int32_t try = 0;
+       uint32_t prev_alt_bkt_idx;
+       unsigned status;
+
+       struct queue_node *prev_node, *curr_node = leaf;
+       struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
+       uint32_t prev_slot, curr_slot = leaf_slot;
+
+       int cuckoo_path_len = 0;
+
+       while (try < 10) {
+               status = rte_xbegin();
+               if (likely(status == RTE_XBEGIN_STARTED)) {
+                       while (likely(curr_node->prev != NULL)) {
+                               prev_node = curr_node->prev;
+                               prev_bkt = prev_node->bkt;
+                               prev_slot = curr_node->prev_slot;
+
+                               prev_alt_bkt_idx
+                                       = prev_bkt->signatures[prev_slot].alt
+                                       & h->bucket_bitmask;
+
+                               if (unlikely(&h->buckets[prev_alt_bkt_idx]
+                                            != curr_bkt)) {
+                                       
rte_xabort(RTE_XABORT_CUCKOO_PATH_INVALIDED);
+                               }
+
+                               curr_bkt->signatures[curr_slot]
+                                       = prev_bkt->signatures[prev_slot];
+                               curr_bkt->key_idx[curr_slot]
+                                       = prev_bkt->key_idx[prev_slot];
+                               curr_bkt->flag[curr_slot]
+                                       = RTE_HASH_KEY_FLAG_MOVED;
+
+                               curr_slot = prev_slot;
+                               curr_node = prev_node;
+                               curr_bkt = curr_node->bkt;
+
+                               cuckoo_path_len++;
+                       }
+
+                       curr_bkt->signatures[curr_slot].current = sig;
+                       curr_bkt->signatures[curr_slot].alt = alt_hash;
+                       curr_bkt->key_idx[curr_slot] = new_idx;
+                       curr_bkt->flag[curr_slot] = flag;
+
+                       rte_xend();
+
+                       return 0;
+               }
+
+               /* If we abort we give up this cuckoo path. */
+               try++;
+               rte_pause();
+               continue;
+       }
+
+       return -1;
+}
+
+/* Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
+ * Cuckoo
+ */
+static inline int
+make_space_insert_bfs_mw(const struct rte_hash *h, struct rte_hash_bucket *bkt,
+                        hash_sig_t sig, hash_sig_t alt_hash,
+                        uint32_t new_idx, uint8_t flag)
+{
+       int i;
+       struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
+       struct queue_node *tail, *head;
+       struct rte_hash_bucket *curr_bkt, *alt_bkt;
+
+       tail = queue;
+       head = queue + 1;
+       tail->bkt = bkt;
+       tail->prev = NULL;
+       tail->prev_slot = -1;
+
+       /* Cuckoo Search */
+       while (likely(tail != head && head <
+                     queue + RTE_HASH_BFS_QUEUE_MAX_LEN - 4)) {
+
+               curr_bkt = tail->bkt;
+               for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+                       if (curr_bkt->signatures[i].sig == NULL_SIGNATURE) {
+                               if (likely(tsx_cuckoo_move_insert(h, tail, i,
+                                       sig, alt_hash, new_idx, flag) == 0))
+                                       return 0;
+                       }
+
+                       /* Make sure current key's not already in its
+                        * secondary bucket.
+                        */
+                       if (curr_bkt->flag[i] == RTE_HASH_KEY_FLAG_UNMOVED) {
+                               /* Enqueue new node and keep prev node info */
+                               alt_bkt =
+                                       &(h->buckets[curr_bkt->signatures[i].alt
+                                                    & h->bucket_bitmask]);
+                               head->bkt = alt_bkt;
+                               head->prev = tail;
+                               head->prev_slot = i;
+                               head++;
+                       }
+               }
+               tail++;
+       }
+
+       return -ENOSPC;
+}
+
 /*
  * Function called to enqueue back an index in the cache/ring,
  * as slot has not being used and it can be used in the
@@ -712,30 +852,70 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, 
const void *key,
        rte_memcpy(new_k->key, key, h->key_len);
        new_k->pdata = data;

-       /* Insert new entry is there is room in the primary bucket */
-       for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-               /* Check if slot is available */
-               if (likely(prim_bkt->signatures[i].sig == NULL_SIGNATURE)) {
-                       prim_bkt->signatures[i].current = sig;
-                       prim_bkt->signatures[i].alt = alt_hash;
-                       prim_bkt->key_idx[i] = new_idx;
-                       return new_idx - 1;
+       unsigned status;
+       int32_t try = 0;
+
+       while (try < 10) {
+               status = rte_xbegin();
+               if (likely(status == RTE_XBEGIN_STARTED)) {
+                       /* Insert new entry is there is room in the primary
+                        * bucket.
+                        */
+                       for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+                               /* Check if slot is available */
+                               if (likely(prim_bkt->signatures[i].sig
+                                          == NULL_SIGNATURE)) {
+                                       prim_bkt->signatures[i].current = sig;
+                                       prim_bkt->signatures[i].alt = alt_hash;
+                                       prim_bkt->key_idx[i] = new_idx;
+                                       prim_bkt->flag[i] =
+                                               RTE_HASH_KEY_FLAG_UNMOVED;
+                                       break;
+                               }
+                       }
+                       rte_xend();
+
+                       if (i != RTE_HASH_BUCKET_ENTRIES)
+                               return new_idx - 1;
+
+                       break;
+
+               } else {
+                       /* If we abort we give up this cuckoo path. */
+                       try++;
+                       rte_pause();
+                       continue;
                }
        }

        /* Primary bucket is full, so we need to make space for new entry */
-       ret = make_space_bucket(h, prim_bkt);
-       /*
-        * After recursive function.
-        * Insert the new entry in the position of the pushed entry
-        * if successful or return error and
-        * store the new slot back in the ring
-        */
-       if (ret >= 0) {
-               prim_bkt->signatures[ret].current = sig;
-               prim_bkt->signatures[ret].alt = alt_hash;
-               prim_bkt->key_idx[ret] = new_idx;
-               return new_idx - 1;
+       if (h->multiwrite_add) {
+               ret = make_space_insert_bfs_mw(h, prim_bkt, sig,
+                       alt_hash, new_idx, RTE_HASH_KEY_FLAG_UNMOVED);
+
+               if (ret >= 0)
+                       return new_idx - 1;
+
+               ret = make_space_insert_bfs_mw(h, sec_bkt, sig,
+                       alt_hash, new_idx, RTE_HASH_KEY_FLAG_MOVED);
+
+               if (ret >= 0)
+                       return new_idx - 1;
+
+       } else {
+               /*
+                * After recursive function.
+                * Insert the new entry in the position of the pushed entry
+                * if successful or return error and
+                * store the new slot back in the ring
+                */
+               ret = make_space_bucket(h, prim_bkt);
+               if (ret >= 0) {
+                       prim_bkt->signatures[ret].current = sig;
+                       prim_bkt->signatures[ret].alt = alt_hash;
+                       prim_bkt->key_idx[ret] = new_idx;
+                       return new_idx - 1;
+               }
        }

        /* Error in addition, store new slot back in the ring and return error 
*/
diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
index 724315a..c9612fb 100644
--- a/lib/librte_hash/rte_hash.h
+++ b/lib/librte_hash/rte_hash.h
@@ -60,6 +60,9 @@ extern "C" {
 /** Enable Hardware transactional memory support. */
 #define RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT 0x01

+/** Default behavior of insertion, single writer/multi writer */
+#define RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD 0x02
+
 /** Signature of key that is stored internally. */
 typedef uint32_t hash_sig_t;

-- 
2.5.0

Reply via email to