From: "De Lara Guarch, Pablo"
If using multiple cores on a system with hardware transactional
memory support, thread scaling does not work, as there was a single
point in the hash library which is a bottleneck for all threads,
which is the "free_slots" ring, which stores all the indices of
the free slots in the table.
This patch fixes the problem, by creating a local cache per logical core,
which stores locally indices of free slots,
so most times, writer threads will not interfere each other.
Fixes: 48a399119619 ("hash: replace with cuckoo hash implementation")
Signed-off-by: Pablo de Lara
---
Changes in v3:
- Renamed new extra option in hash parameters
- Comment and renamed enqueue_slot function for better understanding
Changes in v2:
- Included patch dependency below
This patch depends on patch "hash: free internal ring when freeing hash"
(http://www.dpdk.org/dev/patchwork/patch/7377/)
app/test/test_hash_scaling.c | 1 +
doc/guides/rel_notes/release_2_2.rst | 5 ++
lib/librte_hash/rte_cuckoo_hash.c| 150 ++-
lib/librte_hash/rte_hash.h | 3 +
4 files changed, 139 insertions(+), 20 deletions(-)
diff --git a/app/test/test_hash_scaling.c b/app/test/test_hash_scaling.c
index 39602cb..744e5e3 100644
--- a/app/test/test_hash_scaling.c
+++ b/app/test/test_hash_scaling.c
@@ -133,6 +133,7 @@ test_hash_scaling(int locking_mode)
.hash_func = rte_hash_crc,
.hash_func_init_val = 0,
.socket_id = rte_socket_id(),
+ .extra_flag = RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT
};
struct rte_hash *handle;
char name[RTE_HASH_NAMESIZE];
diff --git a/doc/guides/rel_notes/release_2_2.rst
b/doc/guides/rel_notes/release_2_2.rst
index 89e4d58..14d2ed9 100644
--- a/doc/guides/rel_notes/release_2_2.rst
+++ b/doc/guides/rel_notes/release_2_2.rst
@@ -79,6 +79,11 @@ Drivers
Fixed issue when releasing null control queue.
+* **hash: Fixed thread scaling by reducing contention.**
+
+ Fixed issue in hash library where, using multiple cores with
+ hardware transactional memory support, thread scaling did not work,
+ due to the global ring that is shared by all cores.
Libraries
~
diff --git a/lib/librte_hash/rte_cuckoo_hash.c
b/lib/librte_hash/rte_cuckoo_hash.c
index 409fc2e..f797aeb 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -96,8 +96,15 @@ EAL_REGISTER_TAILQ(rte_hash_tailq)
#define KEY_ALIGNMENT 16
+#define LCORE_CACHE_SIZE 8
+
typedef int (*rte_hash_cmp_eq_t)(const void *key1, const void *key2, size_t
key_len);
+struct lcore_cache {
+ unsigned len; /**< Cache len */
+ void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */
+} __rte_cache_aligned;
+
/** A hash table structure. */
struct rte_hash {
char name[RTE_HASH_NAMESIZE]; /**< Name of the hash. */
@@ -117,6 +124,10 @@ struct rte_hash {
struct rte_hash_bucket *buckets;/**< Table with buckets storing
all the
hash values and key
indexes
to the key table*/
+ uint8_t hw_trans_mem_support; /**< Hardware transactional
+ memory support */
+ struct lcore_cache *local_free_slots;
+ /**< Local cache per lcore, storing some indexes of the free slots */
} __rte_cache_aligned;
/* Structure storing both primary and secondary hashes */
@@ -183,6 +194,8 @@ rte_hash_create(const struct rte_hash_parameters *params)
void *k = NULL;
void *buckets = NULL;
char ring_name[RTE_RING_NAMESIZE];
+ unsigned num_key_slots;
+ unsigned hw_trans_mem_support = 0;
unsigned i;
hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
@@ -202,6 +215,10 @@ rte_hash_create(const struct rte_hash_parameters *params)
return NULL;
}
+ /* Check extra flags field to check extra options. */
+ if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
+ hw_trans_mem_support = 1;
+
snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
/* Guarantee there's no existing */
@@ -238,7 +255,18 @@ rte_hash_create(const struct rte_hash_parameters *params)
const uint32_t key_entry_size = sizeof(struct rte_hash_key) +
params->key_len;
/* Store all keys and leave the first entry as a dummy entry for
lookup_bulk */
- const uint64_t key_tbl_size = (uint64_t) key_entry_size *
(params->entries + 1);
+ if (hw_trans_mem_support)
+ /*
+* Increase number of slots by total number of indices
+* that can be stored in the lcore caches
+* except for the first cache
+*/
+ num_key_slots