Re: [PATCH v7 1/6] lib/dlock-list: Distributed and lock-protected lists

2017-10-18 Thread Boqun Feng
On Thu, Oct 05, 2017 at 06:43:23PM +, Waiman Long wrote:
[...]
> +/*
> + * Find the first entry of the next available list.
> + */
> +extern struct dlock_list_node *
> +__dlock_list_next_list(struct dlock_list_iter *iter);
> +
> +/**
> + * __dlock_list_next_entry - Iterate to the next entry of the dlock list
> + * @curr : Pointer to the current dlock_list_node structure
> + * @iter : Pointer to the dlock list iterator structure
> + * Return: Pointer to the next entry or NULL if all the entries are iterated
> + *
> + * The iterator has to be properly initialized before calling this function.
> + */
> +static inline struct dlock_list_node *
> +__dlock_list_next_entry(struct dlock_list_node *curr,
> + struct dlock_list_iter *iter)
> +{
> + /*
> +  * Find next entry
> +  */
> + if (curr)
> + curr = list_next_entry(curr, list);
> +
> + if (!curr || (>list == >entry->list)) {
> + /*
> +  * The current list has been exhausted, try the next available
> +  * list.
> +  */
> + curr = __dlock_list_next_list(iter);
> + }
> +
> + return curr;/* Continue the iteration */
> +}
> +
> +/**
> + * dlock_list_first_entry - get the first element from a list
> + * @iter  : The dlock list iterator.
> + * @type  : The type of the struct this is embedded in.
> + * @member: The name of the dlock_list_node within the struct.
> + * Return : Pointer to the next entry or NULL if all the entries are 
> iterated.
> + */
> +#define dlock_list_first_entry(iter, type, member)   \
> + ({  \
> + struct dlock_list_node *_n; \
> + _n = __dlock_list_next_entry(NULL, iter);   \
> + _n ? list_entry(_n, type, member) : NULL;   \
> + })
> +
> +/**
> + * dlock_list_next_entry - iterate to the next entry of the list
> + * @pos   : The type * to cursor
> + * @iter  : The dlock list iterator.
> + * @member: The name of the dlock_list_node within the struct.
> + * Return : Pointer to the next entry or NULL if all the entries are 
> iterated.
> + *
> + * Note that pos can't be NULL.
> + */
> +#define dlock_list_next_entry(pos, iter, member) \
> + ({  \
> + struct dlock_list_node *_n; \
> + _n = __dlock_list_next_entry(&(pos)->member, iter); \
> + _n ? list_entry(_n, typeof(*(pos)), member) : NULL; \
> + })
> +

[...]

> +/**
> + * dlist_for_each_entry_safe - iterate over the dlock list & safe over 
> removal
> + * @pos   : Type * to use as a loop cursor
> + * @n  : Another type * to use as temporary storage
> + * @iter  : The dlock list iterator
> + * @member: The name of the dlock_list_node within the struct
> + *
> + * This iteration macro is safe with respect to list entry removal.
> + * However, it cannot correctly iterate newly added entries right after the
> + * current one.
> + */
> +#define dlist_for_each_entry_safe(pos, n, iter, member)  
> \

So I missed something interesting here ;-)

> + for (pos = dlock_list_first_entry(iter, typeof(*(pos)), member);\
> + ({  \
> + bool _b = (pos != NULL);\
> + if (_b) \
> + n = dlock_list_next_entry(pos, iter, member);   \

If @pos is the last item of the list of the index/cpu, and
dlock_list_next_entry() will eventually call __dlock_list_next_list(),
which will drop the lock for the current list and grab the lock for the
next list, leaving @pos unprotected. But in the meanwhile, there could
be another thread deleting @pos via dlock_lists_del() and freeing it.
This is a use-after-free.

I think we can have something like:

(by adding a ->prev_entry in dlock_list_iter and severl helper
functions.)

bool dlist_is_last_perlist(struct dlock_list_node *n)
{
return list_is_last(>list, >head->list);

}

void dlock_list_release_prev(struct dlock_list_iter *iter)
{
spin_unlock(iter->prev_entry->lock);
iter->prev_entry = NULL;
}

#define dlist_for_each_entry_safe(pos, n, iter, member) \
for (pos = dlock_list_first_entry(iter, typeof(*(pos)), 
member);\
({  
\
bool _b = (pos != NULL);
\
if (_b) {   
\
if (dlist_is_last_perlist(&(pos)->member)) 

Re: [PATCH v7 1/6] lib/dlock-list: Distributed and lock-protected lists

2017-10-18 Thread Boqun Feng
On Thu, Oct 05, 2017 at 06:43:23PM +, Waiman Long wrote:
[...]
> +/*
> + * Find the first entry of the next available list.
> + */
> +extern struct dlock_list_node *
> +__dlock_list_next_list(struct dlock_list_iter *iter);
> +
> +/**
> + * __dlock_list_next_entry - Iterate to the next entry of the dlock list
> + * @curr : Pointer to the current dlock_list_node structure
> + * @iter : Pointer to the dlock list iterator structure
> + * Return: Pointer to the next entry or NULL if all the entries are iterated
> + *
> + * The iterator has to be properly initialized before calling this function.
> + */
> +static inline struct dlock_list_node *
> +__dlock_list_next_entry(struct dlock_list_node *curr,
> + struct dlock_list_iter *iter)
> +{
> + /*
> +  * Find next entry
> +  */
> + if (curr)
> + curr = list_next_entry(curr, list);
> +
> + if (!curr || (>list == >entry->list)) {
> + /*
> +  * The current list has been exhausted, try the next available
> +  * list.
> +  */
> + curr = __dlock_list_next_list(iter);
> + }
> +
> + return curr;/* Continue the iteration */
> +}
> +
> +/**
> + * dlock_list_first_entry - get the first element from a list
> + * @iter  : The dlock list iterator.
> + * @type  : The type of the struct this is embedded in.
> + * @member: The name of the dlock_list_node within the struct.
> + * Return : Pointer to the next entry or NULL if all the entries are 
> iterated.
> + */
> +#define dlock_list_first_entry(iter, type, member)   \
> + ({  \
> + struct dlock_list_node *_n; \
> + _n = __dlock_list_next_entry(NULL, iter);   \
> + _n ? list_entry(_n, type, member) : NULL;   \
> + })
> +
> +/**
> + * dlock_list_next_entry - iterate to the next entry of the list
> + * @pos   : The type * to cursor
> + * @iter  : The dlock list iterator.
> + * @member: The name of the dlock_list_node within the struct.
> + * Return : Pointer to the next entry or NULL if all the entries are 
> iterated.
> + *
> + * Note that pos can't be NULL.
> + */
> +#define dlock_list_next_entry(pos, iter, member) \
> + ({  \
> + struct dlock_list_node *_n; \
> + _n = __dlock_list_next_entry(&(pos)->member, iter); \
> + _n ? list_entry(_n, typeof(*(pos)), member) : NULL; \
> + })
> +

[...]

> +/**
> + * dlist_for_each_entry_safe - iterate over the dlock list & safe over 
> removal
> + * @pos   : Type * to use as a loop cursor
> + * @n  : Another type * to use as temporary storage
> + * @iter  : The dlock list iterator
> + * @member: The name of the dlock_list_node within the struct
> + *
> + * This iteration macro is safe with respect to list entry removal.
> + * However, it cannot correctly iterate newly added entries right after the
> + * current one.
> + */
> +#define dlist_for_each_entry_safe(pos, n, iter, member)  
> \

So I missed something interesting here ;-)

> + for (pos = dlock_list_first_entry(iter, typeof(*(pos)), member);\
> + ({  \
> + bool _b = (pos != NULL);\
> + if (_b) \
> + n = dlock_list_next_entry(pos, iter, member);   \

If @pos is the last item of the list of the index/cpu, and
dlock_list_next_entry() will eventually call __dlock_list_next_list(),
which will drop the lock for the current list and grab the lock for the
next list, leaving @pos unprotected. But in the meanwhile, there could
be another thread deleting @pos via dlock_lists_del() and freeing it.
This is a use-after-free.

I think we can have something like:

(by adding a ->prev_entry in dlock_list_iter and severl helper
functions.)

bool dlist_is_last_perlist(struct dlock_list_node *n)
{
return list_is_last(>list, >head->list);

}

void dlock_list_release_prev(struct dlock_list_iter *iter)
{
spin_unlock(iter->prev_entry->lock);
iter->prev_entry = NULL;
}

#define dlist_for_each_entry_safe(pos, n, iter, member) \
for (pos = dlock_list_first_entry(iter, typeof(*(pos)), 
member);\
({  
\
bool _b = (pos != NULL);
\
if (_b) {   
\
if (dlist_is_last_perlist(&(pos)->member)) 

Re: [PATCH v7 1/6] lib/dlock-list: Distributed and lock-protected lists

2017-10-13 Thread Waiman Long
On 10/10/2017 01:35 AM, Boqun Feng wrote:
> On Thu, Oct 05, 2017 at 06:43:23PM +, Waiman Long wrote:
> [...]
>> +/*
>> + * As all the locks in the dlock list are dynamically allocated, they need
>> + * to belong to their own special lock class to avoid warning and stack
>> + * trace in kernel log when lockdep is enabled. Statically allocated locks
>> + * don't have this problem.
>> + */
>> +static struct lock_class_key dlock_list_key;
>> +
> So in this way, you make all dlock_lists share the same lock_class_key,
> which means if there are two structures:
>
>   struct some_a {
>   ...
>   struct dlock_list_heads dlists;
>   };
>
>   struct some_b {
>   ...
>   struct dlock_list_heads dlists;
>   };
>
> some_a::dlists and some_b::dlists are going to have the same lockdep
> key, is this what you want? If not, you may want to do something like
> init_srcu_struct() does.

I think it will be a problem only if a task acquire a lock in a
dlock-list and then acquire another lock from another dlock-list. The
way the dlock-list is used, no more than one lock will be acquired at
any time, so there won't be nested locking within the same dlock-list.
It is not a problem with the current use cases that I and Davidlohr
have, but it may be a problem if dlock-list becomes more widely used. I
will take a look at how init_srcu_struct() does, and maybe update the
patch accordingly. Thanks for the suggestion.

>
>> + * dlock_lists_del - Delete a node from a dlock list
>> + * @node : The node to be deleted
>> + *
>> + * We need to check the lock pointer again after taking the lock to guard
>> + * against concurrent deletion of the same node. If the lock pointer changes
>> + * (becomes NULL or to a different one), we assume that the deletion was 
>> done
>> + * elsewhere. A warning will be printed if this happens as it is likely to 
>> be
>> + * a bug.
>> + */
>> +void dlock_lists_del(struct dlock_list_node *node)
>> +{
>> +struct dlock_list_head *head;
>> +bool retry;
>> +
>> +do {
>> +head = READ_ONCE(node->head);
> Since we read node->head locklessly here, I think we should use
> WRITE_ONCE() for all the stores of node->head, to avoid store tearings?

Yes, you are right. I will use WRITE_ONCE() in my next version.

Cheers,
Longman



Re: [PATCH v7 1/6] lib/dlock-list: Distributed and lock-protected lists

2017-10-13 Thread Waiman Long
On 10/10/2017 01:35 AM, Boqun Feng wrote:
> On Thu, Oct 05, 2017 at 06:43:23PM +, Waiman Long wrote:
> [...]
>> +/*
>> + * As all the locks in the dlock list are dynamically allocated, they need
>> + * to belong to their own special lock class to avoid warning and stack
>> + * trace in kernel log when lockdep is enabled. Statically allocated locks
>> + * don't have this problem.
>> + */
>> +static struct lock_class_key dlock_list_key;
>> +
> So in this way, you make all dlock_lists share the same lock_class_key,
> which means if there are two structures:
>
>   struct some_a {
>   ...
>   struct dlock_list_heads dlists;
>   };
>
>   struct some_b {
>   ...
>   struct dlock_list_heads dlists;
>   };
>
> some_a::dlists and some_b::dlists are going to have the same lockdep
> key, is this what you want? If not, you may want to do something like
> init_srcu_struct() does.

I think it will be a problem only if a task acquire a lock in a
dlock-list and then acquire another lock from another dlock-list. The
way the dlock-list is used, no more than one lock will be acquired at
any time, so there won't be nested locking within the same dlock-list.
It is not a problem with the current use cases that I and Davidlohr
have, but it may be a problem if dlock-list becomes more widely used. I
will take a look at how init_srcu_struct() does, and maybe update the
patch accordingly. Thanks for the suggestion.

>
>> + * dlock_lists_del - Delete a node from a dlock list
>> + * @node : The node to be deleted
>> + *
>> + * We need to check the lock pointer again after taking the lock to guard
>> + * against concurrent deletion of the same node. If the lock pointer changes
>> + * (becomes NULL or to a different one), we assume that the deletion was 
>> done
>> + * elsewhere. A warning will be printed if this happens as it is likely to 
>> be
>> + * a bug.
>> + */
>> +void dlock_lists_del(struct dlock_list_node *node)
>> +{
>> +struct dlock_list_head *head;
>> +bool retry;
>> +
>> +do {
>> +head = READ_ONCE(node->head);
> Since we read node->head locklessly here, I think we should use
> WRITE_ONCE() for all the stores of node->head, to avoid store tearings?

Yes, you are right. I will use WRITE_ONCE() in my next version.

Cheers,
Longman



Re: [PATCH v7 1/6] lib/dlock-list: Distributed and lock-protected lists

2017-10-09 Thread Boqun Feng
On Thu, Oct 05, 2017 at 06:43:23PM +, Waiman Long wrote:
[...]
> +/*
> + * As all the locks in the dlock list are dynamically allocated, they need
> + * to belong to their own special lock class to avoid warning and stack
> + * trace in kernel log when lockdep is enabled. Statically allocated locks
> + * don't have this problem.
> + */
> +static struct lock_class_key dlock_list_key;
> +

So in this way, you make all dlock_lists share the same lock_class_key,
which means if there are two structures:

struct some_a {
...
struct dlock_list_heads dlists;
};

struct some_b {
...
struct dlock_list_heads dlists;
};

some_a::dlists and some_b::dlists are going to have the same lockdep
key, is this what you want? If not, you may want to do something like
init_srcu_struct() does.

> +/*
> + * Initialize cpu2idx mapping table
> + *
> + * It is possible that a dlock-list can be allocated before the cpu2idx is
> + * initialized. In this case, all the cpus are mapped to the first entry
> + * before initialization.
> + *
> + */
> +static int __init cpu2idx_init(void)
> +{
> + int idx, cpu;
> +
> + idx = 0;
> + for_each_possible_cpu(cpu)
> + per_cpu(cpu2idx, cpu) = idx++;
> + return 0;
> +}
> +postcore_initcall(cpu2idx_init);
> +
> +/**
> + * alloc_dlock_list_heads - Initialize and allocate the list of head entries
> + * @dlist: Pointer to the dlock_list_heads structure to be initialized
> + * Return: 0 if successful, -ENOMEM if memory allocation error
> + *
> + * This function does not allocate the dlock_list_heads structure itself. The
> + * callers will have to do their own memory allocation, if necessary. 
> However,
> + * this allows embedding the dlock_list_heads structure directly into other
> + * structures.
> + */
> +int alloc_dlock_list_heads(struct dlock_list_heads *dlist)
> +{
> + int idx;
> +
> + dlist->heads = kcalloc(nr_cpu_ids, sizeof(struct dlock_list_head),
> +GFP_KERNEL);
> +
> + if (!dlist->heads)
> + return -ENOMEM;
> +
> + for (idx = 0; idx < nr_cpu_ids; idx++) {
> + struct dlock_list_head *head = >heads[idx];
> +
> + INIT_LIST_HEAD(>list);
> + head->lock = __SPIN_LOCK_UNLOCKED(>lock);
> + lockdep_set_class(>lock, _list_key);
> + }
> + return 0;
> +}
> +
> +/**
> + * free_dlock_list_heads - Free all the heads entries of the dlock list
> + * @dlist: Pointer of the dlock_list_heads structure to be freed
> + *
> + * This function doesn't free the dlock_list_heads structure itself. So
> + * the caller will have to do it, if necessary.
> + */
> +void free_dlock_list_heads(struct dlock_list_heads *dlist)
> +{
> + kfree(dlist->heads);
> + dlist->heads = NULL;
> +}
> +
> +/**
> + * dlock_lists_empty - Check if all the dlock lists are empty
> + * @dlist: Pointer to the dlock_list_heads structure
> + * Return: true if list is empty, false otherwise.
> + *
> + * This can be a pretty expensive function call. If this function is required
> + * in a performance critical path, we may have to maintain a global count
> + * of the list entries in the global dlock_list_heads structure instead.
> + */
> +bool dlock_lists_empty(struct dlock_list_heads *dlist)
> +{
> + int idx;
> +
> + for (idx = 0; idx < nr_cpu_ids; idx++)
> + if (!list_empty(>heads[idx].list))
> + return false;
> + return true;
> +}
> +
> +/**
> + * dlock_lists_add - Adds a node to the given dlock list
> + * @node : The node to be added
> + * @dlist: The dlock list where the node is to be added
> + *
> + * List selection is based on the CPU being used when the dlock_list_add()
> + * function is called. However, deletion may be done by a different CPU.
> + */
> +void dlock_lists_add(struct dlock_list_node *node,
> +  struct dlock_list_heads *dlist)
> +{
> + struct dlock_list_head *head = >heads[this_cpu_read(cpu2idx)];
> +
> + /*
> +  * There is no need to disable preemption
> +  */
> + spin_lock(>lock);
> + node->head = head;
> + list_add(>list, >list);
> + spin_unlock(>lock);
> +}
> +
> +/**
> + * dlock_lists_del - Delete a node from a dlock list
> + * @node : The node to be deleted
> + *
> + * We need to check the lock pointer again after taking the lock to guard
> + * against concurrent deletion of the same node. If the lock pointer changes
> + * (becomes NULL or to a different one), we assume that the deletion was done
> + * elsewhere. A warning will be printed if this happens as it is likely to be
> + * a bug.
> + */
> +void dlock_lists_del(struct dlock_list_node *node)
> +{
> + struct dlock_list_head *head;
> + bool retry;
> +
> + do {
> + head = READ_ONCE(node->head);

Since we read node->head locklessly here, I think we should use
WRITE_ONCE() for all the stores of node->head, to avoid 

Re: [PATCH v7 1/6] lib/dlock-list: Distributed and lock-protected lists

2017-10-09 Thread Boqun Feng
On Thu, Oct 05, 2017 at 06:43:23PM +, Waiman Long wrote:
[...]
> +/*
> + * As all the locks in the dlock list are dynamically allocated, they need
> + * to belong to their own special lock class to avoid warning and stack
> + * trace in kernel log when lockdep is enabled. Statically allocated locks
> + * don't have this problem.
> + */
> +static struct lock_class_key dlock_list_key;
> +

So in this way, you make all dlock_lists share the same lock_class_key,
which means if there are two structures:

struct some_a {
...
struct dlock_list_heads dlists;
};

struct some_b {
...
struct dlock_list_heads dlists;
};

some_a::dlists and some_b::dlists are going to have the same lockdep
key, is this what you want? If not, you may want to do something like
init_srcu_struct() does.

> +/*
> + * Initialize cpu2idx mapping table
> + *
> + * It is possible that a dlock-list can be allocated before the cpu2idx is
> + * initialized. In this case, all the cpus are mapped to the first entry
> + * before initialization.
> + *
> + */
> +static int __init cpu2idx_init(void)
> +{
> + int idx, cpu;
> +
> + idx = 0;
> + for_each_possible_cpu(cpu)
> + per_cpu(cpu2idx, cpu) = idx++;
> + return 0;
> +}
> +postcore_initcall(cpu2idx_init);
> +
> +/**
> + * alloc_dlock_list_heads - Initialize and allocate the list of head entries
> + * @dlist: Pointer to the dlock_list_heads structure to be initialized
> + * Return: 0 if successful, -ENOMEM if memory allocation error
> + *
> + * This function does not allocate the dlock_list_heads structure itself. The
> + * callers will have to do their own memory allocation, if necessary. 
> However,
> + * this allows embedding the dlock_list_heads structure directly into other
> + * structures.
> + */
> +int alloc_dlock_list_heads(struct dlock_list_heads *dlist)
> +{
> + int idx;
> +
> + dlist->heads = kcalloc(nr_cpu_ids, sizeof(struct dlock_list_head),
> +GFP_KERNEL);
> +
> + if (!dlist->heads)
> + return -ENOMEM;
> +
> + for (idx = 0; idx < nr_cpu_ids; idx++) {
> + struct dlock_list_head *head = >heads[idx];
> +
> + INIT_LIST_HEAD(>list);
> + head->lock = __SPIN_LOCK_UNLOCKED(>lock);
> + lockdep_set_class(>lock, _list_key);
> + }
> + return 0;
> +}
> +
> +/**
> + * free_dlock_list_heads - Free all the heads entries of the dlock list
> + * @dlist: Pointer of the dlock_list_heads structure to be freed
> + *
> + * This function doesn't free the dlock_list_heads structure itself. So
> + * the caller will have to do it, if necessary.
> + */
> +void free_dlock_list_heads(struct dlock_list_heads *dlist)
> +{
> + kfree(dlist->heads);
> + dlist->heads = NULL;
> +}
> +
> +/**
> + * dlock_lists_empty - Check if all the dlock lists are empty
> + * @dlist: Pointer to the dlock_list_heads structure
> + * Return: true if list is empty, false otherwise.
> + *
> + * This can be a pretty expensive function call. If this function is required
> + * in a performance critical path, we may have to maintain a global count
> + * of the list entries in the global dlock_list_heads structure instead.
> + */
> +bool dlock_lists_empty(struct dlock_list_heads *dlist)
> +{
> + int idx;
> +
> + for (idx = 0; idx < nr_cpu_ids; idx++)
> + if (!list_empty(>heads[idx].list))
> + return false;
> + return true;
> +}
> +
> +/**
> + * dlock_lists_add - Adds a node to the given dlock list
> + * @node : The node to be added
> + * @dlist: The dlock list where the node is to be added
> + *
> + * List selection is based on the CPU being used when the dlock_list_add()
> + * function is called. However, deletion may be done by a different CPU.
> + */
> +void dlock_lists_add(struct dlock_list_node *node,
> +  struct dlock_list_heads *dlist)
> +{
> + struct dlock_list_head *head = >heads[this_cpu_read(cpu2idx)];
> +
> + /*
> +  * There is no need to disable preemption
> +  */
> + spin_lock(>lock);
> + node->head = head;
> + list_add(>list, >list);
> + spin_unlock(>lock);
> +}
> +
> +/**
> + * dlock_lists_del - Delete a node from a dlock list
> + * @node : The node to be deleted
> + *
> + * We need to check the lock pointer again after taking the lock to guard
> + * against concurrent deletion of the same node. If the lock pointer changes
> + * (becomes NULL or to a different one), we assume that the deletion was done
> + * elsewhere. A warning will be printed if this happens as it is likely to be
> + * a bug.
> + */
> +void dlock_lists_del(struct dlock_list_node *node)
> +{
> + struct dlock_list_head *head;
> + bool retry;
> +
> + do {
> + head = READ_ONCE(node->head);

Since we read node->head locklessly here, I think we should use
WRITE_ONCE() for all the stores of node->head, to avoid 

[PATCH v7 1/6] lib/dlock-list: Distributed and lock-protected lists

2017-10-05 Thread Waiman Long
Linked list is used everywhere in the Linux kernel. However, if many
threads are trying to add or delete entries into the same linked list,
it can create a performance bottleneck.

This patch introduces a new list APIs that provide a set of distributed
lists (one per CPU), each of which is protected by its own spinlock.
To the callers, however, the set of lists acts like a single
consolidated list.  This allows list entries insertion and deletion
operations to happen in parallel instead of being serialized with a
global list and lock.

List entry insertion is strictly per cpu. List deletion, however, can
happen in a cpu other than the one that did the insertion. So we still
need lock to protect the list. Because of that, there may still be
a small amount of contention when deletion is being done.

A new header file include/linux/dlock-list.h will be added with the
associated dlock_list_head and dlock_list_node structures. The following
functions are provided to manage the per-cpu list:

 1. int alloc_dlock_list_heads(struct dlock_list_heads *dlist)
 2. void free_dlock_list_heads(struct dlock_list_heads *dlist)
 3. void dlock_list_add(struct dlock_list_node *node,
struct dlock_list_heads *dlist)
 4. void dlock_list_del(struct dlock_list *node)

Iteration of all the list entries within a dlock list array
is done by calling either the dlist_for_each_entry() or
dlist_for_each_entry_safe() macros. They correspond to the
list_for_each_entry() and list_for_each_entry_safe() macros
respectively. The iteration states are keep in a dlock_list_iter
structure that is passed to the iteration macros.

Signed-off-by: Waiman Long 
Reviewed-by: Jan Kara 
---
 include/linux/dlock-list.h | 224 +++
 lib/Makefile   |   2 +-
 lib/dlock-list.c   | 231 +
 3 files changed, 456 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/dlock-list.h
 create mode 100644 lib/dlock-list.c

diff --git a/include/linux/dlock-list.h b/include/linux/dlock-list.h
new file mode 100644
index 000..7940e524
--- /dev/null
+++ b/include/linux/dlock-list.h
@@ -0,0 +1,224 @@
+/*
+ * Distributed and locked list
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2016 Hewlett-Packard Enterprise Development LP
+ * (C) Copyright 2017 Red Hat, Inc.
+ *
+ * Authors: Waiman Long 
+ */
+#ifndef __LINUX_DLOCK_LIST_H
+#define __LINUX_DLOCK_LIST_H
+
+#include 
+#include 
+
+/*
+ * include/linux/dlock-list.h
+ *
+ * The dlock_list_head structure contains the spinlock. It is cacheline
+ * aligned to reduce contention among different CPUs. The other
+ * dlock_list_node structures contains a pointer to the head entry instead.
+ */
+struct dlock_list_head {
+   struct list_head list;
+   spinlock_t lock;
+} cacheline_aligned_in_smp;
+
+struct dlock_list_heads {
+   struct dlock_list_head *heads;
+};
+
+/*
+ * dlock list node data structure
+ */
+struct dlock_list_node {
+   struct list_head list;
+   struct dlock_list_head *head;
+};
+
+/*
+ * dlock list iteration state
+ *
+ * This is an opaque data structure that may change. Users of this structure
+ * should not access the structure members directly other than using the
+ * helper functions and macros provided in this header file.
+ */
+struct dlock_list_iter {
+   int index;
+   struct dlock_list_head *head, *entry;
+};
+
+#define DLOCK_LIST_ITER_INIT(dlist)\
+   {   \
+   .index = -1,\
+   .head = (dlist)->heads, \
+   }
+
+#define DEFINE_DLOCK_LIST_ITER(s, heads)   \
+   struct dlock_list_iter s = DLOCK_LIST_ITER_INIT(heads)
+
+static inline void init_dlock_list_iter(struct dlock_list_iter *iter,
+   struct dlock_list_heads *heads)
+{
+   *iter = (struct dlock_list_iter)DLOCK_LIST_ITER_INIT(heads);
+}
+
+#define DLOCK_LIST_NODE_INIT(name) \
+   {   \
+   .list = LIST_HEAD_INIT(name)\
+   }
+
+static inline void init_dlock_list_node(struct dlock_list_node *node)
+{
+   *node = (struct dlock_list_node)DLOCK_LIST_NODE_INIT(node->list);
+}
+
+/**
+ * dlock_list_unlock - unlock the spinlock that protects the current list
+ * @iter: Pointer to the dlock list iterator structure
+ */
+static 

[PATCH v7 1/6] lib/dlock-list: Distributed and lock-protected lists

2017-10-05 Thread Waiman Long
Linked list is used everywhere in the Linux kernel. However, if many
threads are trying to add or delete entries into the same linked list,
it can create a performance bottleneck.

This patch introduces a new list APIs that provide a set of distributed
lists (one per CPU), each of which is protected by its own spinlock.
To the callers, however, the set of lists acts like a single
consolidated list.  This allows list entries insertion and deletion
operations to happen in parallel instead of being serialized with a
global list and lock.

List entry insertion is strictly per cpu. List deletion, however, can
happen in a cpu other than the one that did the insertion. So we still
need lock to protect the list. Because of that, there may still be
a small amount of contention when deletion is being done.

A new header file include/linux/dlock-list.h will be added with the
associated dlock_list_head and dlock_list_node structures. The following
functions are provided to manage the per-cpu list:

 1. int alloc_dlock_list_heads(struct dlock_list_heads *dlist)
 2. void free_dlock_list_heads(struct dlock_list_heads *dlist)
 3. void dlock_list_add(struct dlock_list_node *node,
struct dlock_list_heads *dlist)
 4. void dlock_list_del(struct dlock_list *node)

Iteration of all the list entries within a dlock list array
is done by calling either the dlist_for_each_entry() or
dlist_for_each_entry_safe() macros. They correspond to the
list_for_each_entry() and list_for_each_entry_safe() macros
respectively. The iteration states are keep in a dlock_list_iter
structure that is passed to the iteration macros.

Signed-off-by: Waiman Long 
Reviewed-by: Jan Kara 
---
 include/linux/dlock-list.h | 224 +++
 lib/Makefile   |   2 +-
 lib/dlock-list.c   | 231 +
 3 files changed, 456 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/dlock-list.h
 create mode 100644 lib/dlock-list.c

diff --git a/include/linux/dlock-list.h b/include/linux/dlock-list.h
new file mode 100644
index 000..7940e524
--- /dev/null
+++ b/include/linux/dlock-list.h
@@ -0,0 +1,224 @@
+/*
+ * Distributed and locked list
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * (C) Copyright 2016 Hewlett-Packard Enterprise Development LP
+ * (C) Copyright 2017 Red Hat, Inc.
+ *
+ * Authors: Waiman Long 
+ */
+#ifndef __LINUX_DLOCK_LIST_H
+#define __LINUX_DLOCK_LIST_H
+
+#include 
+#include 
+
+/*
+ * include/linux/dlock-list.h
+ *
+ * The dlock_list_head structure contains the spinlock. It is cacheline
+ * aligned to reduce contention among different CPUs. The other
+ * dlock_list_node structures contains a pointer to the head entry instead.
+ */
+struct dlock_list_head {
+   struct list_head list;
+   spinlock_t lock;
+} cacheline_aligned_in_smp;
+
+struct dlock_list_heads {
+   struct dlock_list_head *heads;
+};
+
+/*
+ * dlock list node data structure
+ */
+struct dlock_list_node {
+   struct list_head list;
+   struct dlock_list_head *head;
+};
+
+/*
+ * dlock list iteration state
+ *
+ * This is an opaque data structure that may change. Users of this structure
+ * should not access the structure members directly other than using the
+ * helper functions and macros provided in this header file.
+ */
+struct dlock_list_iter {
+   int index;
+   struct dlock_list_head *head, *entry;
+};
+
+#define DLOCK_LIST_ITER_INIT(dlist)\
+   {   \
+   .index = -1,\
+   .head = (dlist)->heads, \
+   }
+
+#define DEFINE_DLOCK_LIST_ITER(s, heads)   \
+   struct dlock_list_iter s = DLOCK_LIST_ITER_INIT(heads)
+
+static inline void init_dlock_list_iter(struct dlock_list_iter *iter,
+   struct dlock_list_heads *heads)
+{
+   *iter = (struct dlock_list_iter)DLOCK_LIST_ITER_INIT(heads);
+}
+
+#define DLOCK_LIST_NODE_INIT(name) \
+   {   \
+   .list = LIST_HEAD_INIT(name)\
+   }
+
+static inline void init_dlock_list_node(struct dlock_list_node *node)
+{
+   *node = (struct dlock_list_node)DLOCK_LIST_NODE_INIT(node->list);
+}
+
+/**
+ * dlock_list_unlock - unlock the spinlock that protects the current list
+ * @iter: Pointer to the dlock list iterator structure
+ */
+static inline void dlock_list_unlock(struct dlock_list_iter