* Lai Jiangshan ([email protected]) wrote:
> The old memory mangement is named as LFHT_MEMORY_ORDER.
> 
> Signed-off-by: Lai Jiangshan <[email protected]>
> ---
>  rculfhash.c |  116 
> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 files changed, 111 insertions(+), 5 deletions(-)
> 
> diff --git a/rculfhash.c b/rculfhash.c
> index b7be893..ae1177f 100644
> --- a/rculfhash.c
> +++ b/rculfhash.c
> @@ -172,6 +172,10 @@
>  #define dbg_printf(fmt, args...)
>  #endif
>  
> +#if !defined(LFHT_MEMORY_ORDER) && !defined(LFHT_MEMORY_CHUNK)
> +#define LFHT_MEMORY_ORDER
> +#endif
> +
>  /*
>   * Split-counters lazily update the global counter each 1024
>   * addition/removal. It automatically keeps track of resize required.
> @@ -195,6 +199,8 @@
>  #define MAX_TABLE_ORDER                      64
>  #endif
>  
> +#define MAX_CHUNK_TABLE                      (1UL << 10)
> +
>  /*
>   * Minimum number of bucket nodes to touch per thread to parallelize 
> grow/shrink.
>   */
> @@ -247,6 +253,7 @@ struct rcu_table {
>       unsigned long resize_target;
>       int resize_initiated;
>  
> +#ifdef LFHT_MEMORY_ORDER
>       /*
>        * Contains the per order-index-level bucket node table. The size
>        * of each bucket node table is half the number of hashes contained
> @@ -255,6 +262,7 @@ struct rcu_table {
>        * levels to improve cache locality for small index orders.
>        */
>       struct cds_lfht_node *tbl[MAX_TABLE_ORDER];
> +#endif
>  };
>  
>  /*
> @@ -289,6 +297,16 @@ struct cds_lfht {
>       pthread_attr_t *resize_attr;    /* Resize threads attributes */
>       long count;                     /* global approximate item count */
>       struct ht_items_count *split_count;     /* split item count */
> +
> +#ifdef LFHT_MEMORY_CHUNK
> +     /*
> +      * Contains the bucket node chunks. The size of each bucket node
> +      * chunk is ->min_alloc_size(avoid to alloc chunks with different
> +      * size). chunks improve cache locality for small index orders
> +      * and improve lookup_bucket() for the archs without hardware fls().
> +      */
> +     struct cds_lfht_node *tbl[0];
> +#endif
>  };
>  
>  /*
> @@ -753,6 +771,93 @@ unsigned long _uatomic_xchg_monotonic_increase(unsigned 
> long *ptr,
>       return old2;
>  }
>  
> +#ifdef LFHT_MEMORY_CHUNK
> +static
> +struct cds_lfht *_cds_lfht_alloc(unsigned long min_alloc_size,
> +             unsigned long max_size)
> +{
> +     struct cds_lfht *ht;
> +
> +     min_alloc_size = max(min_alloc_size, max_size / MAX_CHUNK_TABLE);
> +     ht = calloc(1, sizeof(struct cds_lfht) +
> +                     sizeof(ht->tbl[0]) * (max_size / min_alloc_size));

Hrm, this first level of indirection quickly becomes much larger than
the order-based table. Have you measured gains with this approach ?

Can you detail the use-case for this approach ?

Have you measured the overhead of software fls() compared to the
overhead of increases cache footprint of the larger table for the
chunk-based approach ?

Thanks,

Mathieu

> +     assert(ht);
> +
> +     ht->min_alloc_size = min_alloc_size;
> +     ht->min_alloc_order = get_count_order_ulong(min_alloc_size);
> +     ht->max_size = max_size;
> +
> +     return ht;
> +}
> +
> +static
> +void cds_lfht_alloc_bucket_table(struct cds_lfht *ht, unsigned long order)
> +{
> +     if (order == 0) {
> +             ht->tbl[0] = calloc(ht->min_alloc_size,
> +                     sizeof(struct cds_lfht_node));
> +             assert(ht->tbl[0]);
> +     } else if (order > ht->min_alloc_order) {
> +             unsigned long i, len = 1UL << (order - 1 - ht->min_alloc_order);
> +
> +             for (i = len; i < 2 * len; i++) {
> +                     ht->tbl[i] = calloc(ht->min_alloc_size,
> +                             sizeof(struct cds_lfht_node));
> +                     assert(ht->tbl[i]);
> +             }
> +     }
> +     /* Nothing to do for 0 < order && order <= ht->min_alloc_order */
> +}
> +
> +/*
> + * cds_lfht_free_bucket_table() should be called with decreasing order.
> + * When cds_lfht_free_bucket_table(0) is called, it means the whole
> + * lfht is destroyed.
> + */
> +static
> +void cds_lfht_free_bucket_table(struct cds_lfht *ht, unsigned long order)
> +{
> +     if (order == 0)
> +             poison_free(ht->tbl[0]);
> +     else if (order > ht->min_alloc_order) {
> +             unsigned long i, len = 1UL << (order - 1 - ht->min_alloc_order);
> +
> +             for (i = len; i < 2 * len; i++)
> +                     poison_free(ht->tbl[i]);
> +     }
> +     /* Nothing to do for 0 < order && order <= ht->min_alloc_order */
> +}
> +
> +static inline
> +struct cds_lfht_node *bucket_at(struct cds_lfht *ht, unsigned long index)
> +{
> +     unsigned long chunk, offset;
> +
> +     if (__builtin_constant_p(index) && index == 0)
> +             return &ht->tbl[0][0];
> +
> +     chunk = index >> ht->min_alloc_order;
> +     offset = index & (ht->min_alloc_size - 1);
> +     return &ht->tbl[chunk][offset];
> +}
> +#else
> +/* LFHT_MEMORY_ORDER */
> +static
> +struct cds_lfht *_cds_lfht_alloc(unsigned long min_alloc_size,
> +             unsigned long max_size)
> +{
> +     struct cds_lfht *ht;
> +
> +     ht = calloc(1, sizeof(struct cds_lfht));
> +     assert(ht);
> +
> +     ht->min_alloc_size = min_alloc_size;
> +     ht->min_alloc_order = get_count_order_ulong(min_alloc_size);
> +     ht->max_size = max_size;
> +
> +     return ht;
> +}
> +
>  static
>  void cds_lfht_alloc_bucket_table(struct cds_lfht *ht, unsigned long order)
>  {
> @@ -803,6 +908,7 @@ struct cds_lfht_node *bucket_at(struct cds_lfht *ht, 
> unsigned long index)
>                  index, order, index & ((1UL << (order - 1)) - 1));
>       return &ht->t.tbl[order][index & ((1UL << (order - 1)) - 1)];
>  }
> +#endif
>  
>  static inline
>  struct cds_lfht_node *lookup_bucket(struct cds_lfht *ht, unsigned long size,
> @@ -1376,8 +1482,11 @@ struct cds_lfht *_cds_lfht_new(unsigned long init_size,
>       if (!init_size || (init_size & (init_size - 1)))
>               return NULL;
>  
> +#ifdef LFHT_MEMORY_ORDER
> +     /* max_size == 0 for LFHT_MEMORY_ORDER means infinite max_size. */
>       if (!max_size)
>               max_size = 1UL << (MAX_TABLE_ORDER - 1);
> +#endif
>  
>       /* max_size must be power of two */
>       if (!max_size || (max_size & (max_size - 1)))
> @@ -1387,8 +1496,8 @@ struct cds_lfht *_cds_lfht_new(unsigned long init_size,
>       init_size = max(init_size, MIN_TABLE_SIZE);
>       max_size = max(max_size, min_alloc_size);
>       init_size = min(init_size, max_size);
> -     ht = calloc(1, sizeof(struct cds_lfht));
> -     assert(ht);
> +
> +     ht = _cds_lfht_alloc(min_alloc_size, max_size);
>       ht->flags = flags;
>       ht->cds_lfht_call_rcu = cds_lfht_call_rcu;
>       ht->cds_lfht_synchronize_rcu = cds_lfht_synchronize_rcu;
> @@ -1404,9 +1513,6 @@ struct cds_lfht *_cds_lfht_new(unsigned long init_size,
>       pthread_mutex_init(&ht->resize_mutex, NULL);
>       order = get_count_order_ulong(init_size);
>       ht->t.resize_target = 1UL << order;
> -     ht->min_alloc_size = min_alloc_size;
> -     ht->min_alloc_order = get_count_order_ulong(min_alloc_size);
> -     ht->max_size = max_size;
>       cds_lfht_create_bucket(ht, 1UL << order);
>       ht->t.size = 1UL << order;
>       return ht;
> -- 
> 1.7.4.4
> 

-- 
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com

_______________________________________________
ltt-dev mailing list
[email protected]
http://lists.casi.polymtl.ca/cgi-bin/mailman/listinfo/ltt-dev

Reply via email to