from:"Sasha Levin"

[PATCH v8 01/16] hashtable: introduce a small and naive hashtable

2012-10-30 Thread Sasha Levin

This hashtable implementation is using hlist buckets to provide a simple
hashtable to prevent it from getting reimplemented all over the kernel.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---

Changes from v8:

 - Addressed comments from Tejun Heo and Mathieu Desnoyers.


 include/linux/hashtable.h | 196 ++
 1 file changed, 196 insertions(+)
 create mode 100644 include/linux/hashtable.h

diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h
new file mode 100644
index 000..3c1a9cb
--- /dev/null
+++ b/include/linux/hashtable.h
@@ -0,0 +1,196 @@
+/*
+ * Statically sized hash table implementation
+ * (C) 2012  Sasha Levin levinsasha...@gmail.com
+ */
+
+#ifndef _LINUX_HASHTABLE_H
+#define _LINUX_HASHTABLE_H
+
+#include linux/list.h
+#include linux/types.h
+#include linux/kernel.h
+#include linux/hash.h
+#include linux/rculist.h
+
+#define DEFINE_HASHTABLE(name, bits)   
\
+   struct hlist_head name[1  (bits)] =   
\
+   { [0 ... ((1  (bits)) - 1)] = HLIST_HEAD_INIT }
+
+#define DECLARE_HASHTABLE(name, bits)  
\
+   struct hlist_head name[1  (bits)]
+
+#define HASH_SIZE(name) (ARRAY_SIZE(name))
+#define HASH_BITS(name) ilog2(HASH_SIZE(name))
+
+/* Use hash_32 when possible to allow for fast 32bit hashing in 64bit kernels. 
*/
+#define hash_min(val, bits)
\
+({ 
\
+   sizeof(val) = 4 ?  
\
+   hash_32(val, bits) :
\
+   hash_long(val, bits);   
\
+})
+
+static inline void __hash_init(struct hlist_head *ht, unsigned int sz)
+{
+   unsigned int i;
+
+   for (i = 0; i  sz; i++)
+   INIT_HLIST_HEAD(ht[i]);
+}
+
+/**
+ * hash_init - initialize a hash table
+ * @hashtable: hashtable to be initialized
+ *
+ * Calculates the size of the hashtable from the given parameter, otherwise
+ * same as hash_init_size.
+ *
+ * This has to be a macro since HASH_BITS() will not work on pointers since
+ * it calculates the size during preprocessing.
+ */
+#define hash_init(hashtable) __hash_init(hashtable, HASH_SIZE(hashtable))
+
+/**
+ * hash_add - add an object to a hashtable
+ * @hashtable: hashtable to add to
+ * @node: the struct hlist_node of the object to be added
+ * @key: the key of the object to be added
+ */
+#define hash_add(hashtable, node, key) 
\
+   hlist_add_head(node, hashtable[hash_min(key, HASH_BITS(hashtable))])
+
+/**
+ * hash_add_rcu - add an object to a rcu enabled hashtable
+ * @hashtable: hashtable to add to
+ * @node: the struct hlist_node of the object to be added
+ * @key: the key of the object to be added
+ */
+#define hash_add_rcu(hashtable, node, key) 
\
+   hlist_add_head_rcu(node, hashtable[hash_min(key, 
HASH_BITS(hashtable))])
+
+/**
+ * hash_hashed - check whether an object is in any hashtable
+ * @node: the struct hlist_node of the object to be checked
+ */
+static inline bool hash_hashed(struct hlist_node *node)
+{
+   return !hlist_unhashed(node);
+}
+
+static inline bool __hash_empty(struct hlist_head *ht, unsigned int sz)
+{
+   unsigned int i;
+
+   for (i = 0; i  sz; i++)
+   if (!hlist_empty(ht[i]))
+   return false;
+
+   return true;
+}
+
+/**
+ * hash_empty - check whether a hashtable is empty
+ * @hashtable: hashtable to check
+ *
+ * This has to be a macro since HASH_BITS() will not work on pointers since
+ * it calculates the size during preprocessing.
+ */
+#define hash_empty(hashtable) __hash_empty(hashtable, HASH_SIZE(hashtable))
+
+/**
+ * hash_del - remove an object from a hashtable
+ * @node: struct hlist_node of the object to remove
+ */
+static inline void hash_del(struct hlist_node *node)
+{
+   hlist_del_init(node);
+}
+
+/**
+ * hash_del_rcu - remove an object from a rcu enabled hashtable
+ * @node: struct hlist_node of the object to remove
+ */
+static inline void hash_del_rcu(struct hlist_node *node)
+{
+   hlist_del_init_rcu(node);
+}
+
+/**
+ * hash_for_each - iterate over a hashtable
+ * @name: hashtable to iterate
+ * @bkt: integer to use as bucket loop cursor
+ * @node: the struct list_head to use as a loop cursor for each entry
+ * @obj: the type * to use as a loop cursor for each entry
+ * @member: the name of the hlist_node within the struct
+ */
+#define hash_for_each(name, bkt, node, obj, member)
\
+   for ((bkt) = 0, node = NULL; node == NULL  (bkt)  HASH_SIZE(name); 
(bkt)++)\
+   hlist_for_each_entry(obj, node, name[bkt], member)
+
+/**
+ * hash_for_each_rcu

[PATCH v8 03/16] mm,ksm: use new hashtable implementation

2012-10-30 Thread Sasha Levin

Switch ksm to use the new hashtable implementation. This reduces the amount of
generic unrelated code in the ksm module.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 mm/ksm.c | 31 +--
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/mm/ksm.c b/mm/ksm.c
index 31ae5ea..751e328 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -33,7 +33,7 @@
 #include linux/mmu_notifier.h
 #include linux/swap.h
 #include linux/ksm.h
-#include linux/hash.h
+#include linux/hashtable.h
 #include linux/freezer.h
 #include linux/oom.h
 
@@ -156,9 +156,8 @@ struct rmap_item {
 static struct rb_root root_stable_tree = RB_ROOT;
 static struct rb_root root_unstable_tree = RB_ROOT;
 
-#define MM_SLOTS_HASH_SHIFT 10
-#define MM_SLOTS_HASH_HEADS (1  MM_SLOTS_HASH_SHIFT)
-static struct hlist_head mm_slots_hash[MM_SLOTS_HASH_HEADS];
+#define MM_SLOTS_HASH_BITS 10
+static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
 
 static struct mm_slot ksm_mm_head = {
.mm_list = LIST_HEAD_INIT(ksm_mm_head.mm_list),
@@ -275,26 +274,21 @@ static inline void free_mm_slot(struct mm_slot *mm_slot)
 
 static struct mm_slot *get_mm_slot(struct mm_struct *mm)
 {
-   struct mm_slot *mm_slot;
-   struct hlist_head *bucket;
struct hlist_node *node;
+   struct mm_slot *slot;
+
+   hash_for_each_possible(mm_slots_hash, slot, node, link, (unsigned 
long)mm) 
+   if (slot-mm == mm)
+   return slot;
 
-   bucket = mm_slots_hash[hash_ptr(mm, MM_SLOTS_HASH_SHIFT)];
-   hlist_for_each_entry(mm_slot, node, bucket, link) {
-   if (mm == mm_slot-mm)
-   return mm_slot;
-   }
return NULL;
 }
 
 static void insert_to_mm_slots_hash(struct mm_struct *mm,
struct mm_slot *mm_slot)
 {
-   struct hlist_head *bucket;
-
-   bucket = mm_slots_hash[hash_ptr(mm, MM_SLOTS_HASH_SHIFT)];
mm_slot-mm = mm;
-   hlist_add_head(mm_slot-link, bucket);
+   hash_add(mm_slots_hash, mm_slot-link, (unsigned long)mm);
 }
 
 static inline int in_stable_tree(struct rmap_item *rmap_item)
@@ -647,7 +641,7 @@ static int unmerge_and_remove_all_rmap_items(void)
ksm_scan.mm_slot = list_entry(mm_slot-mm_list.next,
struct mm_slot, mm_list);
if (ksm_test_exit(mm)) {
-   hlist_del(mm_slot-link);
+   hash_del(mm_slot-link);
list_del(mm_slot-mm_list);
spin_unlock(ksm_mmlist_lock);
 
@@ -1392,7 +1386,7 @@ next_mm:
 * or when all VM_MERGEABLE areas have been unmapped (and
 * mmap_sem then protects against race with MADV_MERGEABLE).
 */
-   hlist_del(slot-link);
+   hash_del(slot-link);
list_del(slot-mm_list);
spin_unlock(ksm_mmlist_lock);
 
@@ -1559,7 +1553,7 @@ void __ksm_exit(struct mm_struct *mm)
mm_slot = get_mm_slot(mm);
if (mm_slot  ksm_scan.mm_slot != mm_slot) {
if (!mm_slot-rmap_list) {
-   hlist_del(mm_slot-link);
+   hash_del(mm_slot-link);
list_del(mm_slot-mm_list);
easy_to_free = 1;
} else {
@@ -2038,6 +2032,7 @@ static int __init ksm_init(void)
 */
hotplug_memory_notifier(ksm_memory_callback, 100);
 #endif
+
return 0;
 
 out_free:
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v8 04/16] workqueue: use new hashtable implementation

2012-10-30 Thread Sasha Levin

Switch workqueues to use the new hashtable implementation. This reduces the
amount of generic unrelated code in the workqueues.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 kernel/workqueue.c | 86 ++
 1 file changed, 15 insertions(+), 71 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a1135c6..8f6e1bf 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -41,6 +41,7 @@
 #include linux/debug_locks.h
 #include linux/lockdep.h
 #include linux/idr.h
+#include linux/hashtable.h
 
 #include workqueue_sched.h
 
@@ -82,8 +83,6 @@ enum {
NR_WORKER_POOLS = 2,/* # worker pools per gcwq */
 
BUSY_WORKER_HASH_ORDER  = 6,/* 64 pointers */
-   BUSY_WORKER_HASH_SIZE   = 1  BUSY_WORKER_HASH_ORDER,
-   BUSY_WORKER_HASH_MASK   = BUSY_WORKER_HASH_SIZE - 1,
 
MAX_IDLE_WORKERS_RATIO  = 4,/* 1/4 of busy can be idle */
IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */
@@ -180,7 +179,7 @@ struct global_cwq {
unsigned intflags;  /* L: GCWQ_* flags */
 
/* workers are chained either in busy_hash or pool idle_list */
-   struct hlist_head   busy_hash[BUSY_WORKER_HASH_SIZE];
+   DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
/* L: hash of busy workers */
 
struct worker_pool  pools[NR_WORKER_POOLS];
@@ -285,8 +284,7 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
 (pool)  (gcwq)-pools[NR_WORKER_POOLS]; (pool)++)
 
 #define for_each_busy_worker(worker, i, pos, gcwq) \
-   for (i = 0; i  BUSY_WORKER_HASH_SIZE; i++) \
-   hlist_for_each_entry(worker, pos, gcwq-busy_hash[i], hentry)
+   hash_for_each(gcwq-busy_hash, i, pos, worker, hentry)
 
 static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask,
  unsigned int sw)
@@ -857,63 +855,6 @@ static inline void worker_clr_flags(struct worker *worker, 
unsigned int flags)
 }
 
 /**
- * busy_worker_head - return the busy hash head for a work
- * @gcwq: gcwq of interest
- * @work: work to be hashed
- *
- * Return hash head of @gcwq for @work.
- *
- * CONTEXT:
- * spin_lock_irq(gcwq-lock).
- *
- * RETURNS:
- * Pointer to the hash head.
- */
-static struct hlist_head *busy_worker_head(struct global_cwq *gcwq,
-  struct work_struct *work)
-{
-   const int base_shift = ilog2(sizeof(struct work_struct));
-   unsigned long v = (unsigned long)work;
-
-   /* simple shift and fold hash, do we need something better? */
-   v = base_shift;
-   v += v  BUSY_WORKER_HASH_ORDER;
-   v = BUSY_WORKER_HASH_MASK;
-
-   return gcwq-busy_hash[v];
-}
-
-/**
- * __find_worker_executing_work - find worker which is executing a work
- * @gcwq: gcwq of interest
- * @bwh: hash head as returned by busy_worker_head()
- * @work: work to find worker for
- *
- * Find a worker which is executing @work on @gcwq.  @bwh should be
- * the hash head obtained by calling busy_worker_head() with the same
- * work.
- *
- * CONTEXT:
- * spin_lock_irq(gcwq-lock).
- *
- * RETURNS:
- * Pointer to worker which is executing @work if found, NULL
- * otherwise.
- */
-static struct worker *__find_worker_executing_work(struct global_cwq *gcwq,
-  struct hlist_head *bwh,
-  struct work_struct *work)
-{
-   struct worker *worker;
-   struct hlist_node *tmp;
-
-   hlist_for_each_entry(worker, tmp, bwh, hentry)
-   if (worker-current_work == work)
-   return worker;
-   return NULL;
-}
-
-/**
  * find_worker_executing_work - find worker which is executing a work
  * @gcwq: gcwq of interest
  * @work: work to find worker for
@@ -932,8 +873,14 @@ static struct worker *__find_worker_executing_work(struct 
global_cwq *gcwq,
 static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
 struct work_struct *work)
 {
-   return __find_worker_executing_work(gcwq, busy_worker_head(gcwq, work),
-   work);
+   struct worker *worker;
+   struct hlist_node *tmp;
+
+   hash_for_each_possible(gcwq-busy_hash, worker, tmp, hentry, (unsigned 
long)work)
+   if (worker-current_work == work)
+   return worker;
+
+   return NULL;
 }
 
 /**
@@ -2160,7 +2107,6 @@ __acquires(gcwq-lock)
struct cpu_workqueue_struct *cwq = get_work_cwq(work);
struct worker_pool *pool = worker-pool;
struct global_cwq *gcwq = pool-gcwq;
-   struct hlist_head *bwh = busy_worker_head(gcwq, work);
bool cpu_intensive = cwq-wq-flags  WQ_CPU_INTENSIVE;
work_func_t f = work-func

[PATCH v8 06/16] tracepoint: use new hashtable implementation

2012-10-30 Thread Sasha Levin

Switch tracepoints to use the new hashtable implementation. This reduces the
amount of generic unrelated code in the tracepoints.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 kernel/tracepoint.c | 25 +
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index d96ba22..5b599f1 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -26,6 +26,7 @@
 #include linux/slab.h
 #include linux/sched.h
 #include linux/static_key.h
+#include linux/hashtable.h
 
 extern struct tracepoint * const __start___tracepoints_ptrs[];
 extern struct tracepoint * const __stop___tracepoints_ptrs[];
@@ -49,8 +50,7 @@ static LIST_HEAD(tracepoint_module_list);
  * Protected by tracepoints_mutex.
  */
 #define TRACEPOINT_HASH_BITS 6
-#define TRACEPOINT_TABLE_SIZE (1  TRACEPOINT_HASH_BITS)
-static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
+static DEFINE_HASHTABLE(tracepoint_table, TRACEPOINT_HASH_BITS);
 
 /*
  * Note about RCU :
@@ -191,16 +191,15 @@ tracepoint_entry_remove_probe(struct tracepoint_entry 
*entry,
  */
 static struct tracepoint_entry *get_tracepoint(const char *name)
 {
-   struct hlist_head *head;
struct hlist_node *node;
struct tracepoint_entry *e;
u32 hash = jhash(name, strlen(name), 0);
 
-   head = tracepoint_table[hash  (TRACEPOINT_TABLE_SIZE - 1)];
-   hlist_for_each_entry(e, node, head, hlist) {
+   hash_for_each_possible(tracepoint_table, e, node, hlist, hash) {
if (!strcmp(name, e-name))
return e;
}
+
return NULL;
 }
 
@@ -210,19 +209,13 @@ static struct tracepoint_entry *get_tracepoint(const char 
*name)
  */
 static struct tracepoint_entry *add_tracepoint(const char *name)
 {
-   struct hlist_head *head;
-   struct hlist_node *node;
struct tracepoint_entry *e;
size_t name_len = strlen(name) + 1;
u32 hash = jhash(name, name_len-1, 0);
 
-   head = tracepoint_table[hash  (TRACEPOINT_TABLE_SIZE - 1)];
-   hlist_for_each_entry(e, node, head, hlist) {
-   if (!strcmp(name, e-name)) {
-   printk(KERN_NOTICE
-   tracepoint %s busy\n, name);
-   return ERR_PTR(-EEXIST);/* Already there */
-   }
+   if (get_tracepoint(name)) {
+   printk(KERN_NOTICE tracepoint %s busy\n, name);
+   return ERR_PTR(-EEXIST);/* Already there */
}
/*
 * Using kmalloc here to allocate a variable length element. Could
@@ -234,7 +227,7 @@ static struct tracepoint_entry *add_tracepoint(const char 
*name)
memcpy(e-name[0], name, name_len);
e-funcs = NULL;
e-refcount = 0;
-   hlist_add_head(e-hlist, head);
+   hash_add(tracepoint_table, e-hlist, hash);
return e;
 }
 
@@ -244,7 +237,7 @@ static struct tracepoint_entry *add_tracepoint(const char 
*name)
  */
 static inline void remove_tracepoint(struct tracepoint_entry *e)
 {
-   hlist_del(e-hlist);
+   hash_del(e-hlist);
kfree(e);
 }
 
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v8 07/16] net,9p: use new hashtable implementation

2012-10-30 Thread Sasha Levin

Switch 9p error table to use the new hashtable implementation. This reduces
the amount of generic unrelated code in 9p.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 net/9p/error.c | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/net/9p/error.c b/net/9p/error.c
index 2ab2de7..a394b37 100644
--- a/net/9p/error.c
+++ b/net/9p/error.c
@@ -34,6 +34,7 @@
 #include linux/jhash.h
 #include linux/errno.h
 #include net/9p/9p.h
+#include linux/hashtable.h
 
 /**
  * struct errormap - map string errors from Plan 9 to Linux numeric ids
@@ -50,8 +51,8 @@ struct errormap {
struct hlist_node list;
 };
 
-#define ERRHASHSZ  32
-static struct hlist_head hash_errmap[ERRHASHSZ];
+#define ERR_HASH_BITS 5
+static DEFINE_HASHTABLE(hash_errmap, ERR_HASH_BITS);
 
 /* FixMe - reduce to a reasonable size */
 static struct errormap errmap[] = {
@@ -193,18 +194,14 @@ static struct errormap errmap[] = {
 int p9_error_init(void)
 {
struct errormap *c;
-   int bucket;
-
-   /* initialize hash table */
-   for (bucket = 0; bucket  ERRHASHSZ; bucket++)
-   INIT_HLIST_HEAD(hash_errmap[bucket]);
+   u32 hash;
 
/* load initial error map into hash table */
for (c = errmap; c-name != NULL; c++) {
c-namelen = strlen(c-name);
-   bucket = jhash(c-name, c-namelen, 0) % ERRHASHSZ;
+   hash = jhash(c-name, c-namelen, 0);
INIT_HLIST_NODE(c-list);
-   hlist_add_head(c-list, hash_errmap[bucket]);
+   hash_add(hash_errmap, c-list, hash);
}
 
return 1;
@@ -223,13 +220,13 @@ int p9_errstr2errno(char *errstr, int len)
int errno;
struct hlist_node *p;
struct errormap *c;
-   int bucket;
+   u32 hash;
 
errno = 0;
p = NULL;
c = NULL;
-   bucket = jhash(errstr, len, 0) % ERRHASHSZ;
-   hlist_for_each_entry(c, p, hash_errmap[bucket], list) {
+   hash = jhash(errstr, len, 0);
+   hash_for_each_possible(hash_errmap, c, p, list, hash) {
if (c-namelen == len  !memcmp(c-name, errstr, len)) {
errno = c-val;
break;
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v8 08/16] block,elevator: use new hashtable implementation

2012-10-30 Thread Sasha Levin

Switch elevator to use the new hashtable implementation. This reduces the
amount of generic unrelated code in the elevator.

This also removes the dymanic allocation of the hash table. The size of the 
table is
constant so there's no point in paying the price of an extra dereference when 
accessing
it.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 block/blk.h  |  2 +-
 block/elevator.c | 23 ---
 include/linux/elevator.h |  5 -
 3 files changed, 9 insertions(+), 21 deletions(-)

diff --git a/block/blk.h b/block/blk.h
index ca51543..a0abbf6 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -61,7 +61,7 @@ static inline void blk_clear_rq_complete(struct request *rq)
 /*
  * Internal elevator interface
  */
-#define ELV_ON_HASH(rq)(!hlist_unhashed((rq)-hash))
+#define ELV_ON_HASH(rq) hash_hashed((rq)-hash)
 
 void blk_insert_flush(struct request *rq);
 void blk_abort_flushes(struct request_queue *q);
diff --git a/block/elevator.c b/block/elevator.c
index 9b1d42b..898d0eb 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -46,11 +46,6 @@ static LIST_HEAD(elv_list);
 /*
  * Merge hash stuff.
  */
-static const int elv_hash_shift = 6;
-#define ELV_HASH_BLOCK(sec)((sec)  3)
-#define ELV_HASH_FN(sec)   \
-   (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
-#define ELV_HASH_ENTRIES   (1  elv_hash_shift)
 #define rq_hash_key(rq)(blk_rq_pos(rq) + blk_rq_sectors(rq))
 
 /*
@@ -142,7 +137,6 @@ static struct elevator_queue *elevator_alloc(struct 
request_queue *q,
  struct elevator_type *e)
 {
struct elevator_queue *eq;
-   int i;
 
eq = kmalloc_node(sizeof(*eq), GFP_KERNEL | __GFP_ZERO, q-node);
if (unlikely(!eq))
@@ -151,14 +145,7 @@ static struct elevator_queue *elevator_alloc(struct 
request_queue *q,
eq-type = e;
kobject_init(eq-kobj, elv_ktype);
mutex_init(eq-sysfs_lock);
-
-   eq-hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES,
-   GFP_KERNEL, q-node);
-   if (!eq-hash)
-   goto err;
-
-   for (i = 0; i  ELV_HASH_ENTRIES; i++)
-   INIT_HLIST_HEAD(eq-hash[i]);
+   hash_init(eq-hash);
 
return eq;
 err:
@@ -173,7 +160,6 @@ static void elevator_release(struct kobject *kobj)
 
e = container_of(kobj, struct elevator_queue, kobj);
elevator_put(e-type);
-   kfree(e-hash);
kfree(e);
 }
 
@@ -240,7 +226,7 @@ EXPORT_SYMBOL(elevator_exit);
 
 static inline void __elv_rqhash_del(struct request *rq)
 {
-   hlist_del_init(rq-hash);
+   hash_del(rq-hash);
 }
 
 static void elv_rqhash_del(struct request_queue *q, struct request *rq)
@@ -254,7 +240,7 @@ static void elv_rqhash_add(struct request_queue *q, struct 
request *rq)
struct elevator_queue *e = q-elevator;
 
BUG_ON(ELV_ON_HASH(rq));
-   hlist_add_head(rq-hash, e-hash[ELV_HASH_FN(rq_hash_key(rq))]);
+   hash_add(e-hash, rq-hash, rq_hash_key(rq));
 }
 
 static void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
@@ -266,11 +252,10 @@ static void elv_rqhash_reposition(struct request_queue 
*q, struct request *rq)
 static struct request *elv_rqhash_find(struct request_queue *q, sector_t 
offset)
 {
struct elevator_queue *e = q-elevator;
-   struct hlist_head *hash_list = e-hash[ELV_HASH_FN(offset)];
struct hlist_node *entry, *next;
struct request *rq;
 
-   hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) {
+   hash_for_each_possible_safe(e-hash, rq, entry, next, hash, offset) {
BUG_ON(!ELV_ON_HASH(rq));
 
if (unlikely(!rq_mergeable(rq))) {
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index c03af76..7c5a7c9 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -2,6 +2,7 @@
 #define _LINUX_ELEVATOR_H
 
 #include linux/percpu.h
+#include linux/hashtable.h
 
 #ifdef CONFIG_BLOCK
 
@@ -96,6 +97,8 @@ struct elevator_type
struct list_head list;
 };
 
+#define ELV_HASH_BITS 6
+
 /*
  * each queue has an elevator_queue associated with it
  */
@@ -105,8 +108,8 @@ struct elevator_queue
void *elevator_data;
struct kobject kobj;
struct mutex sysfs_lock;
-   struct hlist_head *hash;
unsigned int registered:1;
+   DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
 };
 
 /*
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v8 09/16] SUNRPC/cache: use new hashtable implementation

2012-10-30 Thread Sasha Levin

Switch cache to use the new hashtable implementation. This reduces the amount
of generic unrelated code in the cache implementation.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 net/sunrpc/cache.c | 18 +++---
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index fc2f7aa..eaf9693 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -28,6 +28,7 @@
 #include linux/workqueue.h
 #include linux/mutex.h
 #include linux/pagemap.h
+#include linux/hashtable.h
 #include asm/ioctls.h
 #include linux/sunrpc/types.h
 #include linux/sunrpc/cache.h
@@ -524,19 +525,18 @@ EXPORT_SYMBOL_GPL(cache_purge);
  * it to be revisited when cache info is available
  */
 
-#defineDFR_HASHSIZE(PAGE_SIZE/sizeof(struct list_head))
-#defineDFR_HASH(item)  long)item)4 ^ (((long)item)13)) % 
DFR_HASHSIZE)
+#defineDFR_HASH_BITS   9
 
 #defineDFR_MAX 300 /* ??? */
 
 static DEFINE_SPINLOCK(cache_defer_lock);
 static LIST_HEAD(cache_defer_list);
-static struct hlist_head cache_defer_hash[DFR_HASHSIZE];
+static DEFINE_HASHTABLE(cache_defer_hash, DFR_HASH_BITS);
 static int cache_defer_cnt;
 
 static void __unhash_deferred_req(struct cache_deferred_req *dreq)
 {
-   hlist_del_init(dreq-hash);
+   hash_del(dreq-hash);
if (!list_empty(dreq-recent)) {
list_del_init(dreq-recent);
cache_defer_cnt--;
@@ -545,10 +545,7 @@ static void __unhash_deferred_req(struct 
cache_deferred_req *dreq)
 
 static void __hash_deferred_req(struct cache_deferred_req *dreq, struct 
cache_head *item)
 {
-   int hash = DFR_HASH(item);
-
-   INIT_LIST_HEAD(dreq-recent);
-   hlist_add_head(dreq-hash, cache_defer_hash[hash]);
+   hash_add(cache_defer_hash, dreq-hash, (unsigned long)item);
 }
 
 static void setup_deferral(struct cache_deferred_req *dreq,
@@ -600,7 +597,7 @@ static void cache_wait_req(struct cache_req *req, struct 
cache_head *item)
 * to clean up
 */
spin_lock(cache_defer_lock);
-   if (!hlist_unhashed(sleeper.handle.hash)) {
+   if (hash_hashed(sleeper.handle.hash)) {
__unhash_deferred_req(sleeper.handle);
spin_unlock(cache_defer_lock);
} else {
@@ -671,12 +668,11 @@ static void cache_revisit_request(struct cache_head *item)
struct cache_deferred_req *dreq;
struct list_head pending;
struct hlist_node *lp, *tmp;
-   int hash = DFR_HASH(item);
 
INIT_LIST_HEAD(pending);
spin_lock(cache_defer_lock);
 
-   hlist_for_each_entry_safe(dreq, lp, tmp, cache_defer_hash[hash], hash)
+   hash_for_each_possible_safe(cache_defer_hash, dreq, lp, tmp, hash, 
(unsigned long)item)
if (dreq-item == item) {
__unhash_deferred_req(dreq);
list_add(dreq-recent, pending);
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v8 10/16] dlm: use new hashtable implementation

2012-10-30 Thread Sasha Levin

Switch dlm to use the new hashtable implementation. This reduces the amount of
generic unrelated code in the dlm.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 fs/dlm/lowcomms.c | 53 ++---
 1 file changed, 18 insertions(+), 35 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 331ea4f..19acdfe 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -55,6 +55,7 @@
 #include net/sctp/sctp.h
 #include net/sctp/user.h
 #include net/ipv6.h
+#include linux/hashtable.h
 
 #include dlm_internal.h
 #include lowcomms.h
@@ -62,7 +63,7 @@
 #include config.h
 
 #define NEEDED_RMEM (4*1024*1024)
-#define CONN_HASH_SIZE 32
+#define CONN_HASH_BITS 5
 
 /* Number of messages to send before rescheduling */
 #define MAX_SEND_MSG_COUNT 25
@@ -158,34 +159,27 @@ static int dlm_allow_conn;
 static struct workqueue_struct *recv_workqueue;
 static struct workqueue_struct *send_workqueue;
 
-static struct hlist_head connection_hash[CONN_HASH_SIZE];
+/*
+ * On a sidenote, hash function could be very simple because most clusters
+ * have simple sequential nodeids, so we should be able to go straight to
+ * a connection struct in the array. We don't utilize it at the moment,
+ * but it's something work to keep in mind.
+ */
+static DEFINE_HASHTABLE(connection_hash, CONN_HASH_BITS);
 static DEFINE_MUTEX(connections_lock);
 static struct kmem_cache *con_cache;
 
 static void process_recv_sockets(struct work_struct *work);
 static void process_send_sockets(struct work_struct *work);
 
-
-/* This is deliberately very simple because most clusters have simple
-   sequential nodeids, so we should be able to go straight to a connection
-   struct in the array */
-static inline int nodeid_hash(int nodeid)
-{
-   return nodeid  (CONN_HASH_SIZE-1);
-}
-
 static struct connection *__find_con(int nodeid)
 {
-   int r;
struct hlist_node *h;
struct connection *con;
 
-   r = nodeid_hash(nodeid);
-
-   hlist_for_each_entry(con, h, connection_hash[r], list) {
+   hash_for_each_possible(connection_hash, con, h, list, nodeid)
if (con-nodeid == nodeid)
return con;
-   }
return NULL;
 }
 
@@ -196,7 +190,6 @@ static struct connection *__find_con(int nodeid)
 static struct connection *__nodeid2con(int nodeid, gfp_t alloc)
 {
struct connection *con = NULL;
-   int r;
 
con = __find_con(nodeid);
if (con || !alloc)
@@ -206,8 +199,7 @@ static struct connection *__nodeid2con(int nodeid, gfp_t 
alloc)
if (!con)
return NULL;
 
-   r = nodeid_hash(nodeid);
-   hlist_add_head(con-list, connection_hash[r]);
+   hash_add(connection_hash, con-list, nodeid);
 
con-nodeid = nodeid;
mutex_init(con-sock_mutex);
@@ -235,11 +227,8 @@ static void foreach_conn(void (*conn_func)(struct 
connection *c))
struct hlist_node *h, *n;
struct connection *con;
 
-   for (i = 0; i  CONN_HASH_SIZE; i++) {
-   hlist_for_each_entry_safe(con, h, n, connection_hash[i], list){
-   conn_func(con);
-   }
-   }
+   hash_for_each_safe(connection_hash, i, h, n, con, list)
+   conn_func(con);
 }
 
 static struct connection *nodeid2con(int nodeid, gfp_t allocation)
@@ -262,12 +251,10 @@ static struct connection *assoc2con(int assoc_id)
 
mutex_lock(connections_lock);
 
-   for (i = 0 ; i  CONN_HASH_SIZE; i++) {
-   hlist_for_each_entry(con, h, connection_hash[i], list) {
-   if (con-sctp_assoc == assoc_id) {
-   mutex_unlock(connections_lock);
-   return con;
-   }
+   hash_for_each(connection_hash, i, h, con, list) {
+   if (con-sctp_assoc == assoc_id) {
+   mutex_unlock(connections_lock);
+   return con;
}
}
mutex_unlock(connections_lock);
@@ -1638,7 +1625,7 @@ static void free_conn(struct connection *con)
close_connection(con, true);
if (con-othercon)
kmem_cache_free(con_cache, con-othercon);
-   hlist_del(con-list);
+   hash_del(con-list);
kmem_cache_free(con_cache, con);
 }
 
@@ -1667,10 +1654,6 @@ int dlm_lowcomms_start(void)
 {
int error = -EINVAL;
struct connection *con;
-   int i;
-
-   for (i = 0; i  CONN_HASH_SIZE; i++)
-   INIT_HLIST_HEAD(connection_hash[i]);
 
init_local();
if (!dlm_local_count) {
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v8 11/16] net,l2tp: use new hashtable implementation

2012-10-30 Thread Sasha Levin

Switch l2tp to use the new hashtable implementation. This reduces the amount
of generic unrelated code in l2tp.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 net/l2tp/l2tp_core.c| 140 +++-
 net/l2tp/l2tp_core.h|  15 --
 net/l2tp/l2tp_debugfs.c |  19 +++
 3 files changed, 74 insertions(+), 100 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 1a9f372..0b369e4 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -44,6 +44,7 @@
 #include linux/udp.h
 #include linux/l2tp.h
 #include linux/hash.h
+#include linux/hashtable.h
 #include linux/sort.h
 #include linux/file.h
 #include linux/nsproxy.h
@@ -107,8 +108,14 @@ static unsigned int l2tp_net_id;
 struct l2tp_net {
struct list_head l2tp_tunnel_list;
spinlock_t l2tp_tunnel_list_lock;
-   struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2];
-   spinlock_t l2tp_session_hlist_lock;
+/*
+ * Session hash global list for L2TPv3.
+ * The session_id SHOULD be random according to RFC3931, but several
+ * L2TP implementations use incrementing session_ids.  So we do a real
+ * hash on the session_id, rather than a simple bitmask.
+ */
+   DECLARE_HASHTABLE(l2tp_session_hash, L2TP_HASH_BITS_2);
+   spinlock_t l2tp_session_hash_lock;
 };
 
 static void l2tp_session_set_header_len(struct l2tp_session *session, int 
version);
@@ -156,30 +163,17 @@ do {  
\
 #define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
 #endif
 
-/* Session hash global list for L2TPv3.
- * The session_id SHOULD be random according to RFC3931, but several
- * L2TP implementations use incrementing session_ids.  So we do a real
- * hash on the session_id, rather than a simple bitmask.
- */
-static inline struct hlist_head *
-l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id)
-{
-   return pn-l2tp_session_hlist[hash_32(session_id, L2TP_HASH_BITS_2)];
-
-}
-
 /* Lookup a session by id in the global session list
  */
 static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 
session_id)
 {
struct l2tp_net *pn = l2tp_pernet(net);
-   struct hlist_head *session_list =
-   l2tp_session_id_hash_2(pn, session_id);
struct l2tp_session *session;
struct hlist_node *walk;
 
rcu_read_lock_bh();
-   hlist_for_each_entry_rcu(session, walk, session_list, global_hlist) {
+   hash_for_each_possible_rcu(pn-l2tp_session_hash, session, walk,
+   global_hlist, session_id) {
if (session-session_id == session_id) {
rcu_read_unlock_bh();
return session;
@@ -190,23 +184,10 @@ static struct l2tp_session *l2tp_session_find_2(struct 
net *net, u32 session_id)
return NULL;
 }
 
-/* Session hash list.
- * The session_id SHOULD be random according to RFC2661, but several
- * L2TP implementations (Cisco and Microsoft) use incrementing
- * session_ids.  So we do a real hash on the session_id, rather than a
- * simple bitmask.
- */
-static inline struct hlist_head *
-l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
-{
-   return tunnel-session_hlist[hash_32(session_id, L2TP_HASH_BITS)];
-}
-
 /* Lookup a session by id
  */
 struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel 
*tunnel, u32 session_id)
 {
-   struct hlist_head *session_list;
struct l2tp_session *session;
struct hlist_node *walk;
 
@@ -217,15 +198,14 @@ struct l2tp_session *l2tp_session_find(struct net *net, 
struct l2tp_tunnel *tunn
if (tunnel == NULL)
return l2tp_session_find_2(net, session_id);
 
-   session_list = l2tp_session_id_hash(tunnel, session_id);
-   read_lock_bh(tunnel-hlist_lock);
-   hlist_for_each_entry(session, walk, session_list, hlist) {
+   read_lock_bh(tunnel-hash_lock);
+   hash_for_each_possible(tunnel-session_hash, session, walk, hlist, 
session_id) {
if (session-session_id == session_id) {
-   read_unlock_bh(tunnel-hlist_lock);
+   read_unlock_bh(tunnel-hash_lock);
return session;
}
}
-   read_unlock_bh(tunnel-hlist_lock);
+   read_unlock_bh(tunnel-hash_lock);
 
return NULL;
 }
@@ -238,17 +218,15 @@ struct l2tp_session *l2tp_session_find_nth(struct 
l2tp_tunnel *tunnel, int nth)
struct l2tp_session *session;
int count = 0;
 
-   read_lock_bh(tunnel-hlist_lock);
-   for (hash = 0; hash  L2TP_HASH_SIZE; hash++) {
-   hlist_for_each_entry(session, walk, 
tunnel-session_hlist[hash], hlist) {
-   if (++count  nth) {
-   read_unlock_bh(tunnel-hlist_lock);
-   return session

[PATCH v8 12/16] dm: use new hashtable implementation

2012-10-30 Thread Sasha Levin

Switch dm to use the new hashtable implementation. This reduces the amount of
generic unrelated code in the dm.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 drivers/md/dm-snap.c   | 24 +
 drivers/md/persistent-data/dm-block-manager.c  |  1 -
 .../persistent-data/dm-persistent-data-internal.h  | 19 --
 .../md/persistent-data/dm-transaction-manager.c| 30 +++---
 4 files changed, 16 insertions(+), 58 deletions(-)
 delete mode 100644 drivers/md/persistent-data/dm-persistent-data-internal.h

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 223e7eb..4b19fa0 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -34,9 +34,7 @@ static const char dm_snapshot_merge_target_name[] = 
snapshot-merge;
  */
 #define MIN_IOS 256
 
-#define DM_TRACKED_CHUNK_HASH_SIZE 16
-#define DM_TRACKED_CHUNK_HASH(x)   ((unsigned long)(x)  \
-(DM_TRACKED_CHUNK_HASH_SIZE - 1))
+#define DM_TRACKED_CHUNK_HASH_BITS 4
 
 struct dm_exception_table {
uint32_t hash_mask;
@@ -80,7 +78,7 @@ struct dm_snapshot {
/* Chunks with outstanding reads */
spinlock_t tracked_chunk_lock;
mempool_t *tracked_chunk_pool;
-   struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
+   DECLARE_HASHTABLE(tracked_chunk_hash, DM_TRACKED_CHUNK_HASH_BITS);
 
/* The on disk metadata handler */
struct dm_exception_store *store;
@@ -202,8 +200,7 @@ static struct dm_snap_tracked_chunk *track_chunk(struct 
dm_snapshot *s,
c-chunk = chunk;
 
spin_lock_irq(s-tracked_chunk_lock);
-   hlist_add_head(c-node,
-  s-tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]);
+   hash_add(s-tracked_chunk_hash, c-node, chunk);
spin_unlock_irq(s-tracked_chunk_lock);
 
return c;
@@ -215,7 +212,7 @@ static void stop_tracking_chunk(struct dm_snapshot *s,
unsigned long flags;
 
spin_lock_irqsave(s-tracked_chunk_lock, flags);
-   hlist_del(c-node);
+   hash_del(c-node);
spin_unlock_irqrestore(s-tracked_chunk_lock, flags);
 
mempool_free(c, s-tracked_chunk_pool);
@@ -229,8 +226,7 @@ static int __chunk_is_tracked(struct dm_snapshot *s, 
chunk_t chunk)
 
spin_lock_irq(s-tracked_chunk_lock);
 
-   hlist_for_each_entry(c, hn,
-   s-tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) {
+   hash_for_each_possible(s-tracked_chunk_hash, c, hn, node, chunk) {
if (c-chunk == chunk) {
found = 1;
break;
@@ -1032,7 +1028,6 @@ static void stop_merge(struct dm_snapshot *s)
 static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
struct dm_snapshot *s;
-   int i;
int r = -EINVAL;
char *origin_path, *cow_path;
unsigned args_used, num_flush_requests = 1;
@@ -1127,8 +1122,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned 
int argc, char **argv)
goto bad_tracked_chunk_pool;
}
 
-   for (i = 0; i  DM_TRACKED_CHUNK_HASH_SIZE; i++)
-   INIT_HLIST_HEAD(s-tracked_chunk_hash[i]);
+   hash_init(s-tracked_chunk_hash);
 
spin_lock_init(s-tracked_chunk_lock);
 
@@ -1252,9 +1246,6 @@ static void __handover_exceptions(struct dm_snapshot 
*snap_src,
 
 static void snapshot_dtr(struct dm_target *ti)
 {
-#ifdef CONFIG_DM_DEBUG
-   int i;
-#endif
struct dm_snapshot *s = ti-private;
struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
 
@@ -1285,8 +1276,7 @@ static void snapshot_dtr(struct dm_target *ti)
smp_mb();
 
 #ifdef CONFIG_DM_DEBUG
-   for (i = 0; i  DM_TRACKED_CHUNK_HASH_SIZE; i++)
-   BUG_ON(!hlist_empty(s-tracked_chunk_hash[i]));
+   BUG_ON(!hash_empty(s-tracked_chunk_hash));
 #endif
 
mempool_destroy(s-tracked_chunk_pool);
diff --git a/drivers/md/persistent-data/dm-block-manager.c 
b/drivers/md/persistent-data/dm-block-manager.c
index 5ba2777..31edaf13 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -4,7 +4,6 @@
  * This file is released under the GPL.
  */
 #include dm-block-manager.h
-#include dm-persistent-data-internal.h
 #include ../dm-bufio.h
 
 #include linux/crc32c.h
diff --git a/drivers/md/persistent-data/dm-persistent-data-internal.h 
b/drivers/md/persistent-data/dm-persistent-data-internal.h
deleted file mode 100644
index c49e26f..000
--- a/drivers/md/persistent-data/dm-persistent-data-internal.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (C) 2011 Red Hat, Inc.
- *
- * This file is released under the GPL.
- */
-
-#ifndef _DM_PERSISTENT_DATA_INTERNAL_H
-#define _DM_PERSISTENT_DATA_INTERNAL_H
-
-#include dm-block-manager.h
-
-static inline unsigned dm_hash_block(dm_block_t b, unsigned hash_mask)
-{
-   const unsigned BIG_PRIME

[PATCH v8 02/16] userns: use new hashtable implementation

2012-10-30 Thread Sasha Levin

Switch to using the new hashtable implementation to store user structs.
This reduces the amount of generic unrelated code in kernel/user.c.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 kernel/user.c | 33 -
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/kernel/user.c b/kernel/user.c
index 750acff..f010389 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -16,6 +16,7 @@
 #include linux/interrupt.h
 #include linux/export.h
 #include linux/user_namespace.h
+#include linux/hashtable.h
 
 /*
  * userns count is 1 for root user, 1 for init_uts_ns,
@@ -60,13 +61,9 @@ EXPORT_SYMBOL_GPL(init_user_ns);
  */
 
 #define UIDHASH_BITS   (CONFIG_BASE_SMALL ? 3 : 7)
-#define UIDHASH_SZ (1  UIDHASH_BITS)
-#define UIDHASH_MASK   (UIDHASH_SZ - 1)
-#define __uidhashfn(uid)   (((uid  UIDHASH_BITS) + uid)  UIDHASH_MASK)
-#define uidhashentry(uid)  (uidhash_table + __uidhashfn((__kuid_val(uid
 
 static struct kmem_cache *uid_cachep;
-struct hlist_head uidhash_table[UIDHASH_SZ];
+static DEFINE_HASHTABLE(uidhash_table, UIDHASH_BITS);
 
 /*
  * The uidhash_lock is mostly taken from process context, but it is
@@ -92,22 +89,22 @@ struct user_struct root_user = {
 /*
  * These routines must be called with the uidhash spinlock held!
  */
-static void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent)
+static void uid_hash_insert(struct user_struct *up)
 {
-   hlist_add_head(up-uidhash_node, hashent);
+   hash_add(uidhash_table, up-uidhash_node, __kuid_val(up-uid));
 }
 
 static void uid_hash_remove(struct user_struct *up)
 {
-   hlist_del_init(up-uidhash_node);
+   hash_del(up-uidhash_node);
 }
 
-static struct user_struct *uid_hash_find(kuid_t uid, struct hlist_head 
*hashent)
+static struct user_struct *uid_hash_find(kuid_t uid)
 {
struct user_struct *user;
struct hlist_node *h;
 
-   hlist_for_each_entry(user, h, hashent, uidhash_node) {
+   hash_for_each_possible(uidhash_table, user, h, uidhash_node, 
__kuid_val(uid)) {
if (uid_eq(user-uid, uid)) {
atomic_inc(user-__count);
return user;
@@ -143,7 +140,7 @@ struct user_struct *find_user(kuid_t uid)
unsigned long flags;
 
spin_lock_irqsave(uidhash_lock, flags);
-   ret = uid_hash_find(uid, uidhashentry(uid));
+   ret = uid_hash_find(uid);
spin_unlock_irqrestore(uidhash_lock, flags);
return ret;
 }
@@ -164,11 +161,10 @@ void free_uid(struct user_struct *up)
 
 struct user_struct *alloc_uid(kuid_t uid)
 {
-   struct hlist_head *hashent = uidhashentry(uid);
struct user_struct *up, *new;
 
spin_lock_irq(uidhash_lock);
-   up = uid_hash_find(uid, hashent);
+   up = uid_hash_find(uid);
spin_unlock_irq(uidhash_lock);
 
if (!up) {
@@ -184,13 +180,13 @@ struct user_struct *alloc_uid(kuid_t uid)
 * on adding the same user already..
 */
spin_lock_irq(uidhash_lock);
-   up = uid_hash_find(uid, hashent);
+   up = uid_hash_find(uid);
if (up) {
key_put(new-uid_keyring);
key_put(new-session_keyring);
kmem_cache_free(uid_cachep, new);
} else {
-   uid_hash_insert(new, hashent);
+   uid_hash_insert(new);
up = new;
}
spin_unlock_irq(uidhash_lock);
@@ -204,17 +200,12 @@ out_unlock:
 
 static int __init uid_cache_init(void)
 {
-   int n;
-
uid_cachep = kmem_cache_create(uid_cache, sizeof(struct user_struct),
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
 
-   for(n = 0; n  UIDHASH_SZ; ++n)
-   INIT_HLIST_HEAD(uidhash_table + n);
-
/* Insert the root user immediately (init already runs as root) */
spin_lock_irq(uidhash_lock);
-   uid_hash_insert(root_user, uidhashentry(GLOBAL_ROOT_UID));
+   uid_hash_insert(root_user);
spin_unlock_irq(uidhash_lock);
 
return 0;
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v8 14/16] net,rds: use new hashtable implementation

2012-10-30 Thread Sasha Levin

Switch rds to use the new hashtable implementation. This reduces the amount of
generic unrelated code in rds.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 net/rds/bind.c   |  20 +--
 net/rds/connection.c | 100 ++-
 2 files changed, 53 insertions(+), 67 deletions(-)

diff --git a/net/rds/bind.c b/net/rds/bind.c
index 637bde5..a99e524 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -36,16 +36,16 @@
 #include linux/if_arp.h
 #include linux/jhash.h
 #include linux/ratelimit.h
+#include linux/hashtable.h
 #include rds.h
 
-#define BIND_HASH_SIZE 1024
-static struct hlist_head bind_hash_table[BIND_HASH_SIZE];
+#define BIND_HASH_BITS 10
+static DEFINE_HASHTABLE(bind_hash_table, BIND_HASH_BITS);
 static DEFINE_SPINLOCK(rds_bind_lock);
 
-static struct hlist_head *hash_to_bucket(__be32 addr, __be16 port)
+static u32 rds_hash(__be32 addr, __be16 port)
 {
-   return bind_hash_table + (jhash_2words((u32)addr, (u32)port, 0) 
- (BIND_HASH_SIZE - 1));
+   return jhash_2words((u32)addr, (u32)port, 0);
 }
 
 static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port,
@@ -53,12 +53,12 @@ static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 
port,
 {
struct rds_sock *rs;
struct hlist_node *node;
-   struct hlist_head *head = hash_to_bucket(addr, port);
+   u32 key = rds_hash(addr, port);
u64 cmp;
u64 needle = ((u64)be32_to_cpu(addr)  32) | be16_to_cpu(port);
 
rcu_read_lock();
-   hlist_for_each_entry_rcu(rs, node, head, rs_bound_node) {
+   hash_for_each_possible_rcu(bind_hash_table, rs, node, rs_bound_node, 
key) {
cmp = ((u64)be32_to_cpu(rs-rs_bound_addr)  32) |
  be16_to_cpu(rs-rs_bound_port);
 
@@ -74,13 +74,13 @@ static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 
port,
 * make sure our addr and port are set before
 * we are added to the list, other people
 * in rcu will find us as soon as the
-* hlist_add_head_rcu is done
+* hash_add_rcu is done
 */
insert-rs_bound_addr = addr;
insert-rs_bound_port = port;
rds_sock_addref(insert);
 
-   hlist_add_head_rcu(insert-rs_bound_node, head);
+   hash_add_rcu(bind_hash_table, insert-rs_bound_node, key);
}
return NULL;
 }
@@ -152,7 +152,7 @@ void rds_remove_bound(struct rds_sock *rs)
  rs, rs-rs_bound_addr,
  ntohs(rs-rs_bound_port));
 
-   hlist_del_init_rcu(rs-rs_bound_node);
+   hash_del_rcu(rs-rs_bound_node);
rds_sock_put(rs);
rs-rs_bound_addr = 0;
}
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 9e07c75..a9afcb8 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -34,28 +34,24 @@
 #include linux/list.h
 #include linux/slab.h
 #include linux/export.h
+#include linux/hashtable.h
 #include net/inet_hashtables.h
 
 #include rds.h
 #include loop.h
 
 #define RDS_CONNECTION_HASH_BITS 12
-#define RDS_CONNECTION_HASH_ENTRIES (1  RDS_CONNECTION_HASH_BITS)
-#define RDS_CONNECTION_HASH_MASK (RDS_CONNECTION_HASH_ENTRIES - 1)
 
 /* converting this to RCU is a chore for another day.. */
 static DEFINE_SPINLOCK(rds_conn_lock);
 static unsigned long rds_conn_count;
-static struct hlist_head rds_conn_hash[RDS_CONNECTION_HASH_ENTRIES];
+static DEFINE_HASHTABLE(rds_conn_hash, RDS_CONNECTION_HASH_BITS);
 static struct kmem_cache *rds_conn_slab;
 
-static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr)
+static unsigned long rds_conn_hashfn(__be32 laddr, __be32 faddr)
 {
/* Pass NULL, don't need struct net for hash */
-   unsigned long hash = inet_ehashfn(NULL,
- be32_to_cpu(laddr), 0,
- be32_to_cpu(faddr), 0);
-   return rds_conn_hash[hash  RDS_CONNECTION_HASH_MASK];
+   return inet_ehashfn(NULL,  be32_to_cpu(laddr), 0,  be32_to_cpu(faddr), 
0);
 }
 
 #define rds_conn_info_set(var, test, suffix) do {  \
@@ -64,14 +60,14 @@ static struct hlist_head *rds_conn_bucket(__be32 laddr, 
__be32 faddr)
 } while (0)
 
 /* rcu read lock must be held or the connection spinlock */
-static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
- __be32 laddr, __be32 faddr,
+static struct rds_connection *rds_conn_lookup(__be32 laddr, __be32 faddr,
  struct rds_transport *trans)
 {
struct rds_connection *conn, *ret = NULL;
struct hlist_node *pos;
+   unsigned long key = rds_conn_hashfn(laddr, faddr);
 
-   hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
+   hash_for_each_possible_rcu(rds_conn_hash, conn, pos, c_hash_node, key

[PATCH v8 16/16] tracing output: use new hashtable implementation

2012-10-30 Thread Sasha Levin

Switch tracing to use the new hashtable implementation. This reduces the
amount of generic unrelated code in the tracing module.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 kernel/trace/trace_output.c | 18 ++
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 123b189..6af4879 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -8,15 +8,15 @@
 #include linux/module.h
 #include linux/mutex.h
 #include linux/ftrace.h
+#include linux/hashtable.h
 
 #include trace_output.h
 
-/* must be a power of 2 */
-#define EVENT_HASHSIZE 128
+#define EVENT_HASH_BITS7
 
 DECLARE_RWSEM(trace_event_mutex);
 
-static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
+static DEFINE_HASHTABLE(event_hash, EVENT_HASH_BITS);
 
 static int next_event_type = __TRACE_LAST_TYPE + 1;
 
@@ -712,11 +712,8 @@ struct trace_event *ftrace_find_event(int type)
 {
struct trace_event *event;
struct hlist_node *n;
-   unsigned key;
 
-   key = type  (EVENT_HASHSIZE - 1);
-
-   hlist_for_each_entry(event, n, event_hash[key], node) {
+   hash_for_each_possible(event_hash, event, n, node, type) {
if (event-type == type)
return event;
}
@@ -781,7 +778,6 @@ void trace_event_read_unlock(void)
  */
 int register_ftrace_event(struct trace_event *event)
 {
-   unsigned key;
int ret = 0;
 
down_write(trace_event_mutex);
@@ -833,9 +829,7 @@ int register_ftrace_event(struct trace_event *event)
if (event-funcs-binary == NULL)
event-funcs-binary = trace_nop_print;
 
-   key = event-type  (EVENT_HASHSIZE - 1);
-
-   hlist_add_head(event-node, event_hash[key]);
+   hash_add(event_hash, event-node, event-type);
 
ret = event-type;
  out:
@@ -850,7 +844,7 @@ EXPORT_SYMBOL_GPL(register_ftrace_event);
  */
 int __unregister_ftrace_event(struct trace_event *event)
 {
-   hlist_del(event-node);
+   hash_del(event-node);
list_del(event-list);
return 0;
 }
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v8 15/16] openvswitch: use new hashtable implementation

2012-10-30 Thread Sasha Levin

Switch openvswitch to use the new hashtable implementation. This reduces the
amount of generic unrelated code in openvswitch.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 net/openvswitch/vport.c | 35 ---
 1 file changed, 12 insertions(+), 23 deletions(-)

diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 03779e8..20fdbd4 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -28,6 +28,7 @@
 #include linux/rtnetlink.h
 #include linux/compat.h
 #include net/net_namespace.h
+#include linux/hashtable.h
 
 #include datapath.h
 #include vport.h
@@ -41,8 +42,8 @@ static const struct vport_ops *vport_ops_list[] = {
 };
 
 /* Protected by RCU read lock for reading, RTNL lock for writing. */
-static struct hlist_head *dev_table;
-#define VPORT_HASH_BUCKETS 1024
+#define VPORT_HASH_BITS 10
+static DEFINE_HASHTABLE(dev_table, VPORT_HASH_BITS);
 
 /**
  * ovs_vport_init - initialize vport subsystem
@@ -51,11 +52,6 @@ static struct hlist_head *dev_table;
  */
 int ovs_vport_init(void)
 {
-   dev_table = kzalloc(VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
-   GFP_KERNEL);
-   if (!dev_table)
-   return -ENOMEM;
-
return 0;
 }
 
@@ -66,13 +62,6 @@ int ovs_vport_init(void)
  */
 void ovs_vport_exit(void)
 {
-   kfree(dev_table);
-}
-
-static struct hlist_head *hash_bucket(struct net *net, const char *name)
-{
-   unsigned int hash = jhash(name, strlen(name), (unsigned long) net);
-   return dev_table[hash  (VPORT_HASH_BUCKETS - 1)];
 }
 
 /**
@@ -84,13 +73,12 @@ static struct hlist_head *hash_bucket(struct net *net, 
const char *name)
  */
 struct vport *ovs_vport_locate(struct net *net, const char *name)
 {
-   struct hlist_head *bucket = hash_bucket(net, name);
struct vport *vport;
struct hlist_node *node;
+   int key = full_name_hash(name, strlen(name));
 
-   hlist_for_each_entry_rcu(vport, node, bucket, hash_node)
-   if (!strcmp(name, vport-ops-get_name(vport)) 
-   net_eq(ovs_dp_get_net(vport-dp), net))
+   hash_for_each_possible_rcu(dev_table, vport, node, hash_node, key)
+   if (!strcmp(name, vport-ops-get_name(vport)))
return vport;
 
return NULL;
@@ -174,7 +162,8 @@ struct vport *ovs_vport_add(const struct vport_parms *parms)
 
for (i = 0; i  ARRAY_SIZE(vport_ops_list); i++) {
if (vport_ops_list[i]-type == parms-type) {
-   struct hlist_head *bucket;
+   int key;
+   const char *name;
 
vport = vport_ops_list[i]-create(parms);
if (IS_ERR(vport)) {
@@ -182,9 +171,9 @@ struct vport *ovs_vport_add(const struct vport_parms *parms)
goto out;
}
 
-   bucket = hash_bucket(ovs_dp_get_net(vport-dp),
-vport-ops-get_name(vport));
-   hlist_add_head_rcu(vport-hash_node, bucket);
+   name = vport-ops-get_name(vport);
+   key = full_name_hash(name, strlen(name));
+   hash_add_rcu(dev_table, vport-hash_node, key);
return vport;
}
}
@@ -225,7 +214,7 @@ void ovs_vport_del(struct vport *vport)
 {
ASSERT_RTNL();
 
-   hlist_del_rcu(vport-hash_node);
+   hash_del_rcu(vport-hash_node);
 
vport-ops-destroy(vport);
 }
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v8 13/16] lockd: use new hashtable implementation

2012-10-30 Thread Sasha Levin

Switch lockd to use the new hashtable implementation. This reduces the amount
of generic unrelated code in lockd.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 fs/lockd/svcsubs.c | 58 ++
 1 file changed, 28 insertions(+), 30 deletions(-)

diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 0deb5f6..26c90c8 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -20,6 +20,7 @@
 #include linux/lockd/share.h
 #include linux/module.h
 #include linux/mount.h
+#include linux/hashtable.h
 
 #define NLMDBG_FACILITYNLMDBG_SVCSUBS
 
@@ -28,8 +29,7 @@
  * Global file hash table
  */
 #define FILE_HASH_BITS 7
-#define FILE_NRHASH(1FILE_HASH_BITS)
-static struct hlist_head   nlm_files[FILE_NRHASH];
+static DEFINE_HASHTABLE(nlm_files, FILE_HASH_BITS);
 static DEFINE_MUTEX(nlm_file_mutex);
 
 #ifdef NFSD_DEBUG
@@ -68,7 +68,7 @@ static inline unsigned int file_hash(struct nfs_fh *f)
int i;
for (i=0; iNFS2_FHSIZE;i++)
tmp += f-data[i];
-   return tmp  (FILE_NRHASH - 1);
+   return tmp;
 }
 
 /*
@@ -86,17 +86,17 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file 
**result,
 {
struct hlist_node *pos;
struct nlm_file *file;
-   unsigned inthash;
+   unsigned intkey;
__be32  nfserr;
 
nlm_debug_print_fh(nlm_lookup_file, f);
 
-   hash = file_hash(f);
+   key = file_hash(f);
 
/* Lock file table */
mutex_lock(nlm_file_mutex);
 
-   hlist_for_each_entry(file, pos, nlm_files[hash], f_list)
+   hash_for_each_possible(nlm_files, file, pos, f_list, file_hash(f))
if (!nfs_compare_fh(file-f_handle, f))
goto found;
 
@@ -123,7 +123,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file 
**result,
goto out_free;
}
 
-   hlist_add_head(file-f_list, nlm_files[hash]);
+   hash_add(nlm_files, file-f_list, key);
 
 found:
dprintk(lockd: found file %p (count %d)\n, file, file-f_count);
@@ -147,8 +147,8 @@ static inline void
 nlm_delete_file(struct nlm_file *file)
 {
nlm_debug_print_file(closing file, file);
-   if (!hlist_unhashed(file-f_list)) {
-   hlist_del(file-f_list);
+   if (hash_hashed(file-f_list)) {
+   hash_del(file-f_list);
nlmsvc_ops-fclose(file-f_file);
kfree(file);
} else {
@@ -253,27 +253,25 @@ nlm_traverse_files(void *data, nlm_host_match_fn_t match,
int i, ret = 0;
 
mutex_lock(nlm_file_mutex);
-   for (i = 0; i  FILE_NRHASH; i++) {
-   hlist_for_each_entry_safe(file, pos, next, nlm_files[i], 
f_list) {
-   if (is_failover_file  !is_failover_file(data, file))
-   continue;
-   file-f_count++;
-   mutex_unlock(nlm_file_mutex);
-
-   /* Traverse locks, blocks and shares of this file
-* and update file-f_locks count */
-   if (nlm_inspect_file(data, file, match))
-   ret = 1;
-
-   mutex_lock(nlm_file_mutex);
-   file-f_count--;
-   /* No more references to this file. Let go of it. */
-   if (list_empty(file-f_blocks)  !file-f_locks
- !file-f_shares  !file-f_count) {
-   hlist_del(file-f_list);
-   nlmsvc_ops-fclose(file-f_file);
-   kfree(file);
-   }
+   hash_for_each_safe(nlm_files, i, pos, next, file, f_list) {
+   if (is_failover_file  !is_failover_file(data, file))
+   continue;
+   file-f_count++;
+   mutex_unlock(nlm_file_mutex);
+
+   /* Traverse locks, blocks and shares of this file
+* and update file-f_locks count */
+   if (nlm_inspect_file(data, file, match))
+   ret = 1;
+
+   mutex_lock(nlm_file_mutex);
+   file-f_count--;
+   /* No more references to this file. Let go of it. */
+   if (list_empty(file-f_blocks)  !file-f_locks
+ !file-f_shares  !file-f_count) {
+   hash_del(file-f_list);
+   nlmsvc_ops-fclose(file-f_file);
+   kfree(file);
}
}
mutex_unlock(nlm_file_mutex);
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v8 05/16] mm/huge_memory: use new hashtable implementation

2012-10-30 Thread Sasha Levin

Switch hugemem to use the new hashtable implementation. This reduces the
amount of generic unrelated code in the hugemem.

This also removes the dymanic allocation of the hash table. The size of the 
table is
constant so there's no point in paying the price of an extra dereference when 
accessing
it.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 mm/huge_memory.c | 53 -
 1 file changed, 12 insertions(+), 41 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3c14a96..4b32238 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -19,6 +19,7 @@
 #include linux/mman.h
 #include linux/pagemap.h
 #include linux/migrate.h
+#include linux/hashtable.h
 #include asm/tlb.h
 #include asm/pgalloc.h
 #include internal.h
@@ -59,12 +60,12 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
 static unsigned int khugepaged_max_ptes_none __read_mostly = HPAGE_PMD_NR-1;
 
 static int khugepaged(void *none);
-static int mm_slots_hash_init(void);
 static int khugepaged_slab_init(void);
 static void khugepaged_slab_free(void);
 
-#define MM_SLOTS_HASH_HEADS 1024
-static struct hlist_head *mm_slots_hash __read_mostly;
+#define MM_SLOTS_HASH_BITS 10
+static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
+
 static struct kmem_cache *mm_slot_cache __read_mostly;
 
 /**
@@ -545,12 +546,6 @@ static int __init hugepage_init(void)
if (err)
goto out;
 
-   err = mm_slots_hash_init();
-   if (err) {
-   khugepaged_slab_free();
-   goto out;
-   }
-
/*
 * By default disable transparent hugepages on smaller systems,
 * where the extra memory used could hurt more than TLB overhead
@@ -1694,47 +1689,23 @@ static inline void free_mm_slot(struct mm_slot *mm_slot)
kmem_cache_free(mm_slot_cache, mm_slot);
 }
 
-static int __init mm_slots_hash_init(void)
-{
-   mm_slots_hash = kzalloc(MM_SLOTS_HASH_HEADS * sizeof(struct hlist_head),
-   GFP_KERNEL);
-   if (!mm_slots_hash)
-   return -ENOMEM;
-   return 0;
-}
-
-#if 0
-static void __init mm_slots_hash_free(void)
-{
-   kfree(mm_slots_hash);
-   mm_slots_hash = NULL;
-}
-#endif
-
 static struct mm_slot *get_mm_slot(struct mm_struct *mm)
 {
-   struct mm_slot *mm_slot;
-   struct hlist_head *bucket;
+   struct mm_slot *slot;
struct hlist_node *node;
 
-   bucket = mm_slots_hash[((unsigned long)mm / sizeof(struct mm_struct))
-   % MM_SLOTS_HASH_HEADS];
-   hlist_for_each_entry(mm_slot, node, bucket, hash) {
-   if (mm == mm_slot-mm)
-   return mm_slot;
-   }
+   hash_for_each_possible(mm_slots_hash, slot, node, hash, (unsigned long) 
mm)
+   if (slot-mm == mm)
+   return slot;
+
return NULL;
 }
 
 static void insert_to_mm_slots_hash(struct mm_struct *mm,
struct mm_slot *mm_slot)
 {
-   struct hlist_head *bucket;
-
-   bucket = mm_slots_hash[((unsigned long)mm / sizeof(struct mm_struct))
-   % MM_SLOTS_HASH_HEADS];
mm_slot-mm = mm;
-   hlist_add_head(mm_slot-hash, bucket);
+   hash_add(mm_slots_hash, mm_slot-hash, (long)mm);
 }
 
 static inline int khugepaged_test_exit(struct mm_struct *mm)
@@ -1803,7 +1774,7 @@ void __khugepaged_exit(struct mm_struct *mm)
spin_lock(khugepaged_mm_lock);
mm_slot = get_mm_slot(mm);
if (mm_slot  khugepaged_scan.mm_slot != mm_slot) {
-   hlist_del(mm_slot-hash);
+   hash_del(mm_slot-hash);
list_del(mm_slot-mm_node);
free = 1;
}
@@ -2252,7 +2223,7 @@ static void collect_mm_slot(struct mm_slot *mm_slot)
 
if (khugepaged_test_exit(mm)) {
/* free mm_slot */
-   hlist_del(mm_slot-hash);
+   hash_del(mm_slot-hash);
list_del(mm_slot-mm_node);
 
/*
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v8 01/16] hashtable: introduce a small and naive hashtable

2012-10-30 Thread Sasha Levin

On Tue, Oct 30, 2012 at 5:42 PM, Tejun Heo t...@kernel.org wrote:
 Hello,

 Just some nitpicks.

 On Tue, Oct 30, 2012 at 02:45:57PM -0400, Sasha Levin wrote:
 +/* Use hash_32 when possible to allow for fast 32bit hashing in 64bit 
 kernels. */
 +#define hash_min(val, bits) 
  \
 +({  
  \
 + sizeof(val) = 4 ? 
  \
 + hash_32(val, bits) :   
  \
 + hash_long(val, bits);  
  \
 +})

 Doesn't the above fit in 80 column.  Why is it broken into multiple
 lines?  Also, you probably want () around at least @val.  In general,
 it's a good idea to add () around any macro argument to avoid nasty
 surprises.

It was broken to multiple lines because it looks nicer that way (IMO).

If we wrap it with () it's going to go over 80, so it's going to stay
broken down either way :)


Thanks,
Sasha

 Looks good to me otherwise.

  Reviewed-by: Tejun Heo t...@kernel.org

 Thanks.

 --
 tejun
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v8 01/16] hashtable: introduce a small and naive hashtable

2012-10-30 Thread Sasha Levin

On Tue, Oct 30, 2012 at 8:51 PM, Jim Rees r...@umich.edu wrote:
 Sasha Levin wrote:

   On Tue, Oct 30, 2012 at 5:42 PM, Tejun Heo t...@kernel.org wrote:
Hello,
   
Just some nitpicks.
   
On Tue, Oct 30, 2012 at 02:45:57PM -0400, Sasha Levin wrote:
+/* Use hash_32 when possible to allow for fast 32bit hashing in 64bit 
 kernels. */
+#define hash_min(val, bits) 
  \
+({  
  \
+ sizeof(val) = 4 ? 
  \
+ hash_32(val, bits) :   
  \
+ hash_long(val, bits);  
  \
+})
   
Doesn't the above fit in 80 column.  Why is it broken into multiple
lines?  Also, you probably want () around at least @val.  In general,
it's a good idea to add () around any macro argument to avoid nasty
surprises.

   It was broken to multiple lines because it looks nicer that way (IMO).

   If we wrap it with () it's going to go over 80, so it's going to stay
   broken down either way :)

 I would prefer the body be all on one line too. But shouldn't this be a
 static inline function?

We want sizeof(val), which wouldn't work in a static inline. We can
either wrap a static inline __hash_min() with a macro and pass that
size to it, but that's quite an overkill here, or we can add a size
parameter to hash_min(), but it would look awkward considering how
hash_32()/hash_64()/hash_long() look like.


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v8 01/16] hashtable: introduce a small and naive hashtable

2012-10-30 Thread Sasha Levin

Hi Linus,

 But whatever. This series has gotten way too much bike-shedding
 anyway. I think it should just be applied, since it does remove lines
 of code overall. I'd even possibly apply it to mainline, but it seems
 to be against linux-next.

Yup, I switched to using -next because I've been running my
trinity/KVM tools tests with it.

I can either rebase that on top of mainline, or we can ask maintainers
to take it to their own trees if you take only 01/16 into mainline.
What would you prefer?


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v11] kvm: notify host when the guest is panicked

2012-10-30 Thread Sasha Levin

On Tue, Oct 30, 2012 at 9:48 PM, Wen Congyang we...@cn.fujitsu.com wrote:
 At 10/31/2012 09:12 AM, Marcelo Tosatti Wrote:
 It has been asked earlier why a simple virtio device is not usable
 for this (with no response IIRC).

 1. We can't use virtio device when the kernel is booting.

So the issue here is the small window between the point the guest
becomes self aware and to the point virtio drivers are loaded,
right?

I agree that if something happens during that interval, a
virtio-notifier driver won't catch that, but anything beyond that is
better done with a virtio driver, so how is the generic infrastructure
added in this patch useful to anything beyond detecting panics in that
initial interval?

 2. The virtio's driver can be built as a module, and if it is not loaded
and the kernel is panicked, there is no way to notify the host.

Even if the suggested virtio-notifier driver is built as a module, it
would get auto-loaded when the guest is booting, so I'm not sure about
this point?

 3. I/O port is more reliable than virtio device.
If virtio's driver has some bug, and it cause kernel panicked, we can't
use it. The I/O port is more reliable because it only depends on notifier
chain(If we use virtio device, it also depends on notifier chain).

This is like suggesting that we let KVM emulate virtio-blk on it's
own, parallel to the virtio implementation, so that even if there's a
problem with virtio-blk, KVM can emulate a virtio-blk on it's own.

Furthermore, why stop at virtio? What if the KVM code has a bug and it
doesn't pass IO properly? Or the x86 code? we still want panic
notifications if that happens...


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 21/21] TTY: move tty buffers to tty_port

2012-10-31 Thread Sasha Levin

On Wed, Oct 31, 2012 at 8:53 AM, Jiri Slaby jsl...@suse.cz wrote:
 On 10/25/2012 08:02 PM, Sasha Levin wrote:
 Fuzzing with trinity inside a KVM tools (lkvm) guest with -next kernel
 uncovered the following warning:

 I cannot reproduce that :(. Do you still see it?

Yes, it reproduces pretty easily while fuzzing.


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch for-3.7] mm, mempolicy: fix printing stack contents in numa_maps

2012-10-31 Thread Sasha Levin

On Fri, Oct 26, 2012 at 4:48 AM, Peter Zijlstra pet...@infradead.org wrote:
 On Thu, 2012-10-25 at 16:09 -0700, Linus Torvalds wrote:
 On Thu, Oct 25, 2012 at 7:39 AM, Peter Zijlstra pet...@infradead.org wrote:
 
  So I think the below should work, we hold the spinlock over both rb-tree
  modification as sp free, this makes mpol_shared_policy_lookup() which
  returns the policy with an incremented refcount work with just the
  spinlock.
 
  Comments?

 Looks reasonable, if annoyingly complex for something that shouldn't
 be important enough for this. Oh well.

 I agree with that.. Its just that when doing numa placement one needs to
 respect the pre-existing placement constraints. I've not seen a way
 around this.

 However, please check me on this: the need for this is only for
 linux-next right now, correct? All the current users in my tree are ok
 with just the mutex, no?

 Yes, the need comes from the numa stuff and I'll stick this patch in
 there.

 I completely missed Mel's patch turning it into a mutex, but I guess
 that's what -next is for :-).

So I've been fuzzing with it for the past couple of days and it's been
looking fine with it. Can someone grab it into his tree please?


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 21/21] TTY: move tty buffers to tty_port

2012-10-31 Thread Sasha Levin

On 10/31/2012 11:32 AM, Jiri Slaby wrote:
 On 10/31/2012 04:30 PM, Sasha Levin wrote:
 On Wed, Oct 31, 2012 at 8:53 AM, Jiri Slaby jsl...@suse.cz wrote:
 On 10/25/2012 08:02 PM, Sasha Levin wrote:
 Fuzzing with trinity inside a KVM tools (lkvm) guest with -next kernel
 uncovered the following warning:

 I cannot reproduce that :(. Do you still see it?

 Yes, it reproduces pretty easily while fuzzing.
 
 What is your exact setup? I tried trinity with 100 000 syscalls inside
 KVM with an LDEP-enabled kernel. How many serial ports do you have in
 the guest? Any USB serials in there?

btw, I'm also seeing the following lockups, don't know if it's related:


[ 2283.070569] INFO: task trinity-child20:9161 blocked for more than 120 
seconds.
[ 2283.071775] echo 0  /proc/sys/kernel/hung_task_timeout_secs disables this 
message.
[ 2283.074673] trinity-child20 D 8800276cb000  5424  9161   6364 0x
[ 2283.076018]  880059d9da58 0002 0002 

[ 2283.077393]  880059d7b000 880059d9dfd8 880059d9dfd8 
880059d9dfd8
[ 2283.078763]  8800276cb000 880059d7b000 880059d9da78 
88001a095180
[ 2283.084144] Call Trace:
[ 2283.085039]  [83a98bd5] schedule+0x55/0x60
[ 2283.086748]  [83a98bf3] schedule_preempt_disabled+0x13/0x20
[ 2283.089000]  [83a9735d] __mutex_lock_common+0x36d/0x5a0
[ 2283.090658]  [83a9afb3] ? tty_lock_nested+0x73/0x80
[ 2283.091691]  [83a9afb3] ? tty_lock_nested+0x73/0x80
[ 2283.092779]  [83a975cf] mutex_lock_nested+0x3f/0x50
[ 2283.093875]  [83a9afb3] tty_lock_nested+0x73/0x80
[ 2283.094872]  [83a9afcb] tty_lock+0xb/0x10
[ 2283.095443]  [81bae880] tty_open+0x270/0x5f0
[ 2283.096181]  [8127cda8] chrdev_open+0xf8/0x1d0
[ 2283.097054]  [8127693c] do_dentry_open+0x1fc/0x310
[ 2283.098015]  [8127ccb0] ? cdev_put+0x20/0x20
[ 2283.098943]  [812a] finish_open+0x4a/0x60
[ 2283.099935]  [81286947] do_last+0xb87/0xe70
[ 2283.100910]  [812844b0] ? link_path_walk+0x70/0x900
[ 2283.101553]  [81286cf2] path_openat+0xc2/0x500
[ 2283.102282]  [83a9a314] ? _raw_spin_unlock_irqrestore+0x84/0xb0
[ 2283.103506]  [8128716c] do_filp_open+0x3c/0xa0
[ 2283.104282]  [81296c11] ? __alloc_fd+0x1e1/0x200
[ 2283.105278]  [81277c0c] do_sys_open+0x11c/0x1c0
[ 2283.106519]  [81277ccc] sys_open+0x1c/0x20
[ 2283.107241]  [81277d01] sys_creat+0x11/0x20
[ 2283.107975]  [83a9be18] tracesys+0xe1/0xe6
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC] hlist: drop the node parameter from iterators

2012-11-01 Thread Sasha Levin

/af_iucv.c  |   21 
+++--
 net/key/af_key.c|3 +--
 net/l2tp/l2tp_core.c|   12 
 net/l2tp/l2tp_ip.c  |3 +--
 net/l2tp/l2tp_ip6.c |3 +--
 net/llc/llc_sap.c   |3 +--
 net/mac80211/mesh_pathtbl.c |   20 +++-
 net/netfilter/ipvs/ip_vs_conn.c |   18 +++---
 net/netfilter/nf_conntrack_expect.c |9 +++--
 net/netfilter/nf_conntrack_helper.c |9 +++--
 net/netfilter/nf_conntrack_netlink.c|3 +--
 net/netfilter/nf_nat_core.c |3 +--
 net/netfilter/nfnetlink_cthelper.c  |9 +++--
 net/netfilter/nfnetlink_log.c   |3 +--
 net/netfilter/nfnetlink_queue_core.c|5 ++---
 net/netfilter/xt_RATEEST.c  |3 +--
 net/netfilter/xt_hashlimit.c|9 +++--
 net/netlink/af_netlink.c|   25 
+
 net/netrom/af_netrom.c  |   12 
 net/openvswitch/datapath.c  |5 ++---
 net/openvswitch/flow.c  |8 +++-
 net/openvswitch/vport.c |3 +--
 net/packet/af_packet.c  |3 +--
 net/packet/diag.c   |3 +--
 net/phonet/pep.c|3 +--
 net/phonet/socket.c |8 +++-
 net/rds/bind.c  |3 +--
 net/rds/connection.c|9 +++--
 net/rose/af_rose.c  |   14 +-
 net/sched/sch_cbq.c |   13 +
 net/sched/sch_drr.c |6 ++
 net/sched/sch_hfsc.c|   11 ---
 net/sched/sch_htb.c |8 +++-
 net/sched/sch_qfq.c |7 +++
 net/sctp/endpointola.c  |3 +--
 net/sctp/input.c|6 ++
 net/sctp/proc.c |9 +++--
 net/sctp/socket.c   |7 +++
 net/sunrpc/auth.c   |5 ++---
 net/sunrpc/svcauth.c|3 +--
 net/tipc/name_table.c   |8 +++-
 net/tipc/node.c |3 +--
 net/unix/af_unix.c  |6 ++
 net/unix/diag.c |4 ++--
 net/x25/af_x25.c|   12 
 net/xfrm/xfrm_state.c   |   31 
---
 security/integrity/ima/ima_queue.c  |3 +--
 security/selinux/avc.c  |   17 ++---
 tools/perf/util/evlist.c|3 +--
 virt/kvm/eventfd.c  |3 +--
 virt/kvm/irq_comm.c |   12 
 170 files changed, 481 insertions(+), 879 deletions(-)

Yes, beyond making hlist prettier, we also drop 400 lines. win-win?

Signed-off-by: Sasha Levin sasha.le...@oracle.com
---
 include/linux/list.h| 46 +---
 include/linux/rculist.h | 56 -
 2 files changed, 47 insertions(+), 55 deletions(-)

diff --git a/include/linux/list.h b/include/linux/list.h
index cc6d2aa..223e1dd 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -668,52 +668,44 @@ static inline void hlist_move_list(struct hlist_head *old,
 
 /**
  * hlist_for_each_entry- iterate over list of given type
- * @tpos:  the type * to use as a loop cursor.
- * @pos:   the struct hlist_node to use as a loop cursor.
+ * @pos:   the type * to use as a loop cursor.
  * @head:  the head for your list.
  * @member:the name of the hlist_node within the struct.
  */
-#define hlist_for_each_entry(tpos, pos, head, member)   \
-   for (pos = (head)-first;\
-pos   \
-   ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
-pos = pos-next)
+#define hlist_for_each_entry(pos, head, member)
\
+   for (pos = hlist_entry((head)-first, typeof(*(pos)), member);  
\
+((pos)-member

Re: [RFC] hlist: drop the node parameter from iterators

2012-11-01 Thread Sasha Levin

On 11/01/2012 08:59 PM, Linus Torvalds wrote:
 On Thu, Nov 1, 2012 at 4:06 PM, Sasha Levin sasha.le...@oracle.com wrote:
 I'm not sure why, but the hlist for each entry iterators were conceived
 differently from the list ones. While the list ones are nice and elegant:

 list_for_each_entry(pos, head, member)

 The hlist ones were greedy and wanted an extra parameter:

 hlist_for_each_entry(tpos, pos, head, member)

 Why did they need an extra pos parameter? I'm not quite sure. Not only
 they don't really need it, it also prevents the iterator from looking
 exactly like the list iterator, which is unfortunate.

 [..]
  170 files changed, 481 insertions(+), 879 deletions(-)

 Yes, beyond making hlist prettier, we also drop 400 lines. win-win?
 
 So this has been discussed before, and one of the problems with this
 is just the pain of maintenance. This tends to cause annoyances for
 merging, but also for -stable backporting etc, because it just results
 in a lot of noise.
 
 Now, the hlist_for_each() case isn't used by quite as many sites as
 some of the others helpers like this, so maybe the pain isn't horribly
 bad, but in general I do tend to get nervous about let's clean it up
 when it touches hundreds of files.
 
 Your thing looks nice in that it has the coccinelle script (which
 hopefully means that we really get them all), but just out of
 interest, how different is the patch after running the script on both
 
  (a) my current -git head
  (b) linux-next
 
 because differences (other than just line numbers) imply conflicts.
 How many differences are we talking about? None? Two? Twenty?
 
 (That said, right now linux-next is tiny. It might be more interesting
 to look at the linux-3.5 vs linux-3.6 to get more of a feel for
 differences between releases. Doing just the diff+grep thing, there's
 quite a few changes around hlist_for_each_entry() uses)

Instead of diffing diffs, I've just tried applying different versions
of the patch of different trees, and then looking at how many conflicts
happen as a result of that. I think it's probably a good indication of
how many conflicts this change would really cause.

Here are some stats:

 - Applying the patch from -next on top of your current git head
results in 3 conflicts.

 - Applying the patch from your current git head on top of v3.6 results
in 18 conflicts.

 - Applying the patch from 3.6 on top of 3.5 results in 25 conflicts.



Thanks,
Sasha

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v8 01/16] hashtable: introduce a small and naive hashtable

2012-11-01 Thread Sasha Levin

On Tue, Oct 30, 2012 at 10:23 PM, Linus Torvalds
torva...@linux-foundation.org wrote:
 On Tue, Oct 30, 2012 at 6:36 PM, Sasha Levin levinsasha...@gmail.com wrote:

 I can either rebase that on top of mainline, or we can ask maintainers
 to take it to their own trees if you take only 01/16 into mainline.
 What would you prefer?

 I don't really care deeply. The only reason to merge it now would be
 to avoid any pain with it during the next merge window. Just taking
 01/16 might be the sanest way to do that, then the rest can trickle in
 independently at their own leisure.

Okay, I'll keep working on converting everything else as soon as 01/16
makes it in your tree.


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 21/21] TTY: move tty buffers to tty_port

2012-11-02 Thread Sasha Levin

On Fri, Nov 2, 2012 at 11:51 AM, Jiri Slaby jsl...@suse.cz wrote:
 On 10/31/2012 04:59 PM, Sasha Levin wrote:
 So you probably want a lot more than 100k syscalls, why limit it at
 all actually?

 I unset the limit but I still can't reproduce...

 I've attached my .config for the guest kernel as reference.

 Even using this config does not help to reproduce that.

 Do you use some special trinity params?

Not really:

./trinity -m --quiet --dangerous -l off

Can I add something to my kernel to provide more info when it happens?


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 21/21] TTY: move tty buffers to tty_port

2012-11-02 Thread Sasha Levin

On 11/02/2012 12:18 PM, Jiri Slaby wrote:
 On 11/02/2012 05:07 PM, Sasha Levin wrote:
 On Fri, Nov 2, 2012 at 11:51 AM, Jiri Slaby jsl...@suse.cz wrote:
 On 10/31/2012 04:59 PM, Sasha Levin wrote:
 So you probably want a lot more than 100k syscalls, why limit it at
 all actually?

 I unset the limit but I still can't reproduce...

 I've attached my .config for the guest kernel as reference.

 Even using this config does not help to reproduce that.

 Do you use some special trinity params?

 Not really:

 ./trinity -m --quiet --dangerous -l off
 
 Oh, you run that as root??

Yup, it runs inside a disposable VM.


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC] hlist: drop the node parameter from iterators

2012-11-02 Thread Sasha Levin

On 11/02/2012 04:16 PM, Linus Torvalds wrote:
 On Thu, Nov 1, 2012 at 7:26 PM, Sasha Levin sasha.le...@oracle.com wrote:

 Here are some stats:

  - Applying the patch from -next on top of your current git head
 results in 3 conflicts.

  - Applying the patch from your current git head on top of v3.6 results
 in 18 conflicts.

  - Applying the patch from 3.6 on top of 3.5 results in 25 conflicts.
 
 Ok, that sounds slightly painful, but not unmanageable.
 
 Can you send me the actual patch, and I'll mull on this a bit more? I
 could run the coccinelle script myself, but I assume (maybe
 incorrectly?) that there are manual fixes to clean some stuff up
 afterwared..

You're right about that. There are 2 things which needs to be done after
running the script:

 - Coccinelle doesn't handle cases where there are multiple variables in
the declaration. That means that things like:

struct hlist_node *node, *tmp;

Have to be fixed manually.

That's common when hlist_for_each_entry_safe() is used afterwards.

 - Some places actually use node for their needs, while coccinelle can
automatically fix that to use '(ptr)-member', I'd rather go through those
places manually and make sure we're doing the right thing.


Do you want the patch on top of your git head, or on top of -next?


Thanks,
Sasha

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC] mm: add support for zsmalloc and zcache

2012-09-22 Thread Sasha Levin

On 09/21/2012 09:14 PM, Dan Magenheimer wrote:
 +#define MAX_CLIENTS 16
  
  Seems a bit arbitrary. Why 16?
 Sasha Levin posted a patch to fix this but it was tied in to
 the proposed KVM implementation, so was never merged.
 

My patch changed the max pools per client, not the maximum amount of clients.
That patch has already found it's way in.

(MAX_CLIENTS does look like an arbitrary number though).


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC] mm: add support for zsmalloc and zcache

2012-09-22 Thread Sasha Levin

On 09/22/2012 03:31 PM, Sasha Levin wrote:
 On 09/21/2012 09:14 PM, Dan Magenheimer wrote:
 +#define MAX_CLIENTS 16

 Seems a bit arbitrary. Why 16?
 Sasha Levin posted a patch to fix this but it was tied in to
 the proposed KVM implementation, so was never merged.

 
 My patch changed the max pools per client, not the maximum amount of clients.
 That patch has already found it's way in.
 
 (MAX_CLIENTS does look like an arbitrary number though).

btw, while we're on the subject of KVM, the implementation of tmem/kvm was
blocked due to insufficient performance caused by the lack of multi-page
ops/batching.

Are there any plans to make it better in the future?


Thanks,
Sasha

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: RCU idle CPU detection is broken in linux-next

2012-09-22 Thread Sasha Levin

On 09/22/2012 05:56 PM, Paul E. McKenney wrote:
 And now the prime suspect is the new CONFIG_RCU_USER_QS=y.  Do these
 warnings ever show up with CONFIG_RCU_USER_QS=n?

It seems that disabling that does make the warnings go away.

I'll keep the tests running in case it just reduces the chances or something
like that.


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

blk: NULL ptr deref in blk_dequeue_request()

2012-09-22 Thread Sasha Levin

Hi all,

While fuzzing with trinity inside a KVM tools guest running the latest 
linux-next kernel, I've stumbled on the following BUG.

I've also hit a similar trace where the 'BUG_ON(ELV_ON_HASH(rq));' above that 
list_del_init() gets hit, so I guess it's a race
condition of some sorts.


[9.900299] BUG: unable to handle kernel NULL pointer dereference at 
  (null)
[9.909508] IP: [819ea637] __list_del_entry+0xb7/0xe0
[9.910191] PGD 0
[9.910191] Oops:  [#1] PREEMPT SMP DEBUG_PAGEALLOC
[9.910191] Dumping ftrace buffer:
[9.910191](ftrace buffer empty)
[9.910191] CPU 2
[9.910191] Pid: 3996, comm: kworker/u:2 Tainted: GW
3.6.0-rc6-next-20120921-sasha-1-geb77a39-dirty #3
[9.910191] RIP: 0010:[819ea637]  [819ea637] 
__list_del_entry+0xb7/0xe0
[9.910191] RSP: :880034e11c88  EFLAGS: 00010007
[9.910191] RAX:  RBX: 880034e3ec00 RCX: dead00200200
[9.910191] RDX:  RSI: 85366998 RDI: 880034e3ec00
[9.910191] RBP: 880034e11c88 R08:  R09: 88001af60928
[9.910191] R10:  R11: 0001 R12: 
[9.910191] R13: 85366360 R14:  R15: 85b4edd0
[9.910191] FS:  () GS:88002980() 
knlGS:
[9.910191] CS:  0010 DS:  ES:  CR0: 80050033
[9.910191] CR2:  CR3: 04c26000 CR4: 000406e0
[9.910191] DR0:  DR1:  DR2: 
[9.910191] DR3:  DR6: 0ff0 DR7: 0400
[9.910191] Process kworker/u:2 (pid: 3996, threadinfo 880034e1, 
task 88001af6)
[9.910191] Stack:
[9.910191]  880034e11ca8 819a1a45 880034e3ec00 

[9.910191]  880034e11cc8 819a1ae1  
880034e3ec00
[9.910191]  880034e11ce8 819a271e  

[9.910191] Call Trace:
[9.910191]  [819a1a45] blk_dequeue_request+0x35/0xc0
[9.910191]  [819a1ae1] blk_start_request+0x11/0x40
[9.910191]  [819a271e] blk_fetch_request+0x1e/0x30
[9.910191]  [81e5a89d] redo_fd_request+0x9d/0x3f0
[9.910191]  [8112a779] process_one_work+0x3b9/0x770
[9.910191]  [8112a628] ? process_one_work+0x268/0x770
[9.910191]  [81177a22] ? get_lock_stats+0x22/0x70
[9.910191]  [81e5a800] ? start_motor+0x120/0x120
[9.910191]  [8112b0fa] worker_thread+0x2ba/0x3f0
[9.910191]  [8112ae40] ? rescuer_thread+0x2d0/0x2d0
[9.910191]  [81135d83] kthread+0xe3/0xf0
[9.910191]  [81177aae] ? put_lock_stats.isra.16+0xe/0x40
[9.910191]  [81135ca0] ? insert_kthread_work+0x90/0x90
[9.910191]  [839f1e45] kernel_thread_helper+0x5/0x10
[9.910191]  [81135ca0] ? insert_kthread_work+0x90/0x90
[9.910191] Code: 6a 84 be 3e 00 00 00 48 c7 c7 7b d8 6a 84 31 c0 e8 8f c2 
71 ff eb 2c 0f 1f 44 00 00 48 b9 00 02 20 00 00 00
ad de 48 39 c8 74 8c 4c 8b 00 4c 39 c7 75 a6 4c 8b 42 08 4c 39 c7 75 bc 48 89 
42 08
[9.910191] RIP  [819ea637] __list_del_entry+0xb7/0xe0
[9.910191]  RSP 880034e11c88
[9.910191] CR2: 


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: RCU idle CPU detection is broken in linux-next

2012-09-22 Thread Sasha Levin

On 09/23/2012 02:21 AM, Paul E. McKenney wrote:
 On Sat, Sep 22, 2012 at 02:27:35PM -0700, Paul E. McKenney wrote:
 On Sat, Sep 22, 2012 at 07:50:29PM +0200, Sasha Levin wrote:
 On 09/22/2012 05:56 PM, Paul E. McKenney wrote:
 And now the prime suspect is the new CONFIG_RCU_USER_QS=y.  Do these
 warnings ever show up with CONFIG_RCU_USER_QS=n?

 It seems that disabling that does make the warnings go away.

 I'll keep the tests running in case it just reduces the chances or something
 like that.

 Thank you for testing this!

 And of course the reason that I didn't see these problems is that I
 failed to update my tests to enable CONFIG_RCU_USER_QS.  :-/
 
 Also the fact that I run 32-bit guests on x86.  Sigh!
 
 I take it that you are running 64-bit guests?

Yes, that's correct.


Thanks,
Sasha

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: RCU idle CPU detection is broken in linux-next

2012-09-24 Thread Sasha Levin

Hi Frederic,

On 09/24/2012 11:29 PM, Frederic Weisbecker wrote:
 Sasha,
 
 Can you please test the following branch:
 
 git://github.com/fweisbec/linux-dynticks.git  rcu/idle-for-v3.7-take3
 
 with CONFIG_RCU_USER_QS and CONFIG_RCU_USER_QS_FORCE enabled.
 
 I hope this fixes the warning.
 The changes are:
 
 * add x86: Unspaghettize do_general_protection()
 * updated x86: Exception hooks for userspace RCU extended QS to
 handle some missed trap handlers. Especially do_general_protection()
 because I can see the problem triggered there in Sasha's warnings. I
 fixed more handlers in the way.

I've tested the branch above, and noticed two things:

 - I had merge conflicts when pulling it on top of latest linux-next which I've 
resolved:

diff --cc arch/x86/kernel/traps.c
index 1ba4850,cb20776..386b079
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@@ -644,8 -646,7 +647,9 @@@ EXPORT_SYMBOL_GPL(math_state_restore)
  dotraplinkage void __kprobes
  do_device_not_available(struct pt_regs *regs, long error_code)
  {
 +  BUG_ON(use_eager_fpu());
 +
+   exception_enter(regs);
  #ifdef CONFIG_MATH_EMULATION
if (read_cr0()  X86_CR0_EM) {
struct math_emu_info info = { };

 - While I no longer see the warnings I've originally noticed, if I run with 
Paul's last debug patch I see the following warning:

[  212.200137] WARNING: at arch/x86/kernel/process.c:392 cpu_idle+0x12a/0x1f0()
[  212.200141] Pid: 0, comm: swapper/3 Tainted: GW
3.6.0-rc6-next-20120924-sasha-00030-g71f256c #5
[  212.200142] Call Trace:
[  212.200146]  [810799da] ? cpu_idle+0x12a/0x1f0
[  212.200150]  [811078b6] warn_slowpath_common+0x86/0xb0
[  212.200153]  [811079a5] warn_slowpath_null+0x15/0x20
[  212.200156]  [810799da] cpu_idle+0x12a/0x1f0
[  212.200160]  [839a3ee8] ? setup_APIC_timer+0xb2/0xb6

Which is triggered by:
if (cpuidle_idle_call())
pm_idle();

rcu_idle_exit();
WARN_ON(rcu_is_cpu_idle());  THIS
start_critical_timings();

/* In many cases the interrupt that ended idle
   has already called exit_idle. But some idle
   loops can be woken up without interrupt. */
WARN_ON(rcu_is_cpu_idle());
__exit_idle();
WARN_ON(rcu_is_cpu_idle());


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: RCU idle CPU detection is broken in linux-next

2012-09-24 Thread Sasha Levin

On 09/25/2012 12:47 AM, Sasha Levin wrote:
  - While I no longer see the warnings I've originally noticed, if I run with 
 Paul's last debug patch I see the following warning:

Correction: Original warnings are still there, they just got buried in the huge 
spew that was caused by additional debug warnings
so I've missed them initially.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

tty ldisc lockups in linux-next

2012-09-25 Thread Sasha Levin

Hi all,

While fuzzing with trinity in a KVM tools guest running linux-next kernel, I 
keep hitting the following lockup:

[  842.780242] INFO: task init:1 blocked for more than 120 seconds.
[  842.780732] echo 0  /proc/sys/kernel/hung_task_timeout_secs disables this 
message.
[  842.781559] initD 88000d5b  3344 1  0 0x0002
[  842.783226]  88000d5adc28 0082 88000d5adbe8 
81150ac5
[  842.784714]  88000d5adfd8 88000d5adfd8 88000d5adfd8 
88000d5adfd8
[  842.785737]  84e2e420 88000d5b 88000d5b08f0 
7fff
[  842.786764] Call Trace:
[  842.787102]  [81150ac5] ? sched_clock_local+0x25/0xa0
[  842.787858]  [83a0be45] schedule+0x55/0x60
[  842.788511]  [83a09dd5] schedule_timeout+0x45/0x360
[  842.789251]  [83a0d54d] ? _raw_spin_unlock_irqrestore+0x5d/0xb0
[  842.790149]  [8117b13d] ? trace_hardirqs_on+0xd/0x10
[  842.790594]  [83a0d574] ? _raw_spin_unlock_irqrestore+0x84/0xb0
[  842.791096]  [81137af7] ? prepare_to_wait+0x77/0x90
[  842.791535]  [81b9b2c6] tty_ldisc_wait_idle.isra.7+0x76/0xb0
[  842.792016]  [81137cd0] ? abort_exclusive_wait+0xb0/0xb0
[  842.792490]  [81b9c03b] tty_ldisc_hangup+0x1cb/0x320
[  842.792924]  [81b933a2] ? __tty_hangup+0x122/0x430
[  842.793364]  [81b933aa] __tty_hangup+0x12a/0x430
[  842.794077]  [83a0d574] ? _raw_spin_unlock_irqrestore+0x84/0xb0
[  842.794942]  [81b955cc] disassociate_ctty+0x6c/0x230
[  842.795693]  [8110e7e8] do_exit+0x3d8/0xa90
[  842.796361]  [83a0e4d9] ? retint_swapgs+0x13/0x1b
[  842.797079]  [8110ef64] do_group_exit+0x84/0xd0
[  842.797818]  [8110efc2] sys_exit_group+0x12/0x20
[  842.798524]  [83a0edcd] system_call_fastpath+0x1a/0x1f
[  842.799294] 1 lock held by init/1:
[  842.799734]  #0:  (tty-ldisc_mutex){+.+.+.}, at: [81b9bf92] 
tty_ldisc_hangup+0x122/0x320


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: tty ldisc lockups in linux-next

2012-09-25 Thread Sasha Levin

On 09/25/2012 10:52 AM, Jiri Slaby wrote:
 Hi all,
  
  While fuzzing with trinity in a KVM tools guest running linux-next kernel, 
  I keep hitting the following lockup:
 Hi, I'm confused here. Is this different to what you reported a couple
 days ago? Doesn't reverting aa3c8af86382 help in the end?

I was just about to send a reply to that mail saying that while reverting 
aa3c8af86382 reduces the odds for seeing it, it still
happens. You were faster than me :)

But yes, it still happens even if I revert aa3c8af86382 or try applying your 
patch in that thread.


Thanks,
Sasha

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: tty ldisc lockups in linux-next

2012-09-25 Thread Sasha Levin

On 09/25/2012 10:56 AM, Jiri Slaby wrote:
 On 09/25/2012 10:55 AM, Sasha Levin wrote:
 On 09/25/2012 10:52 AM, Jiri Slaby wrote:
 Hi all,

 While fuzzing with trinity in a KVM tools guest running linux-next 
 kernel, I keep hitting the following lockup:
 Hi, I'm confused here. Is this different to what you reported a couple
 days ago? Doesn't reverting aa3c8af86382 help in the end?

 I was just about to send a reply to that mail saying that while reverting 
 aa3c8af86382 reduces the odds for seeing it, it still
 happens. You were faster than me :)

 But yes, it still happens even if I revert aa3c8af86382 or try applying your 
 patch in that thread.
 
 The patch won't help, it's kind of certain.
 
 Instead I still wonder what process sits on the terminal. Could you
 investigate?
 

It looks like sh is trying to read:

[  606.950194] sh  S 0001  4800  6260  1 0x
[  606.950194]  88000c0ddcc8 0082 847baa68 
0b02
[  606.950194]  88000c0ddfd8 88000c0ddfd8 88000c0ddfd8 
88000c0ddfd8
[  606.950194]  88000f578000 88000c0bb000 88000c0ddd98 
880040b4d000
[  606.950194] Call Trace:
[  606.950194]  [83a0be45] schedule+0x55/0x60
[  606.950194]  [83a09dd5] schedule_timeout+0x45/0x360
[  606.950194]  [83a0d54d] ? _raw_spin_unlock_irqrestore+0x5d/0xb0
[  606.950194]  [8117b13d] ? trace_hardirqs_on+0xd/0x10
[  606.950194]  [83a0d574] ? _raw_spin_unlock_irqrestore+0x84/0xb0
[  606.950194]  [81b98271] n_tty_read+0x4c1/0x9a0
[  606.950194]  [83a0d54d] ? _raw_spin_unlock_irqrestore+0x5d/0xb0
[  606.950194]  [8114d760] ? try_to_wake_up+0x360/0x360
[  606.950194]  [81b922cf] tty_read+0x8f/0x100
[  606.950194]  [8127187d] vfs_read+0xad/0x180
[  606.950194]  [81271c10] sys_read+0x50/0xa0
[  606.950194]  [83a0edcd] system_call_fastpath+0x1a/0x1f

While init is trying to exit:

[  605.524940] initD 88000d5b  3376 1  0 0x0002
[  605.527502]  88000d5adc28 0082 88000d5adbe8 
81150ac5
[  605.529685]  88000d5adfd8 88000d5adfd8 88000d5adfd8 
88000d5adfd8
[  605.530939]  88000d613000 88000d5b 88000d5b08f0 
7fff
[  605.532064] Call Trace:
[  605.532064]  [81150ac5] ? sched_clock_local+0x25/0xa0
[  605.532064]  [83a0be45] schedule+0x55/0x60
[  605.532064]  [83a09dd5] schedule_timeout+0x45/0x360
[  605.532064]  [83a0d54d] ? _raw_spin_unlock_irqrestore+0x5d/0xb0
[  605.532064]  [8117b13d] ? trace_hardirqs_on+0xd/0x10
[  605.532064]  [83a0d574] ? _raw_spin_unlock_irqrestore+0x84/0xb0
[  605.532064]  [81137af7] ? prepare_to_wait+0x77/0x90
[  605.532064]  [81b9b2c6] tty_ldisc_wait_idle.isra.7+0x76/0xb0
[  605.532064]  [81137cd0] ? abort_exclusive_wait+0xb0/0xb0
[  605.532064]  [81b9c03b] tty_ldisc_hangup+0x1cb/0x320
[  605.532064]  [81b933a2] ? __tty_hangup+0x122/0x430
[  605.532064]  [81b933aa] __tty_hangup+0x12a/0x430
[  605.532064]  [83a0d574] ? _raw_spin_unlock_irqrestore+0x84/0xb0
[  605.532064]  [81b955cc] disassociate_ctty+0x6c/0x230
[  605.532064]  [8110e7e8] do_exit+0x3d8/0xa90
[  605.532064]  [83a0e4d9] ? retint_swapgs+0x13/0x1b
[  605.532064]  [8110ef64] do_group_exit+0x84/0xd0
[  605.532064]  [8110efc2] sys_exit_group+0x12/0x20
[  605.532064]  [83a0edcd] system_call_fastpath+0x1a/0x1f

And the corresponding lock info:

[  606.950194] Showing all locks held in the system:
[  606.950194] 1 lock held by init/1:
[  606.950194]  #0:  (tty-ldisc_mutex){+.+.+.}, at: [81b9bf92] 
tty_ldisc_hangup+0x122/0x320
[  606.950194] 1 lock held by sh/6260:
[  606.950194]  #0:  (tty-atomic_read_lock){+.+...}, at: [81b98078] 
n_tty_read+0x2c8/0x9a0

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: RCU idle CPU detection is broken in linux-next

2012-09-25 Thread Sasha Levin

On 09/25/2012 02:06 PM, Frederic Weisbecker wrote:
 Sasha, sorry to burden you with more testing request.
 Could you please try out this new branch? It includes some fixes after Wu 
 Fenguang and
 Dan Carpenter reports (not related to your warnings though) and a patch on 
 the top
 of the pile to ensure I diagnosed well the problem, which return immediately 
 from
 rcu_user_*() APIs if we are in an interrupt.
 
 This way we'll have a clearer view. I also would like to know if there are 
 other
 problems with the rcu user mode.
 
 Thanks!

Alrighty, I don't see any warnings anymore.

I'll keep everything running just in case.


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v6] hashtable: introduce a small and naive hashtable

2012-09-26 Thread Sasha Levin

This hashtable implementation is using hlist buckets to provide a simple
hashtable to prevent it from getting reimplemented all over the kernel.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---

Changes since v5:

 - Fix hash_init.
 - Clarify this implementation deals with statically allocated hashtables only.

 include/linux/hashtable.h | 190 ++
 1 file changed, 190 insertions(+)
 create mode 100644 include/linux/hashtable.h

diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h
new file mode 100644
index 000..195173e
--- /dev/null
+++ b/include/linux/hashtable.h
@@ -0,0 +1,190 @@
+/*
+ * Statically sized hash table implementation
+ * (C) 2012  Sasha Levin levinsasha...@gmail.com
+ */
+
+#ifndef _LINUX_HASHTABLE_H
+#define _LINUX_HASHTABLE_H
+
+#include linux/list.h
+#include linux/types.h
+#include linux/kernel.h
+#include linux/hash.h
+#include linux/rculist.h
+
+#define DEFINE_HASHTABLE(name, bits)   
\
+   struct hlist_head name[HASH_SIZE(bits)] =   
\
+   { [0 ... HASH_SIZE(bits) - 1] = HLIST_HEAD_INIT }
+
+#define DECLARE_HASHTABLE(name, bits)  
\
+   struct hlist_head name[1  (bits)]
+
+#define HASH_SIZE(name) (1  HASH_BITS(name))
+#define HASH_BITS(name) ilog2(ARRAY_SIZE(name))
+
+/* Use hash_32 when possible to allow for fast 32bit hashing in 64bit kernels. 
*/
+#define hash_min(val, bits)
\
+({ 
\
+   sizeof(val) = 4 ?  
\
+   hash_32(val, bits) :
\
+   hash_long(val, bits);   
\
+})
+
+/**
+ * hash_init - initialize a hash table
+ * @hashtable: hashtable to be initialized
+ *
+ * Calculates the size of the hashtable from the given parameter, otherwise
+ * same as hash_init_size.
+ *
+ * This has to be a macro since HASH_BITS() will not work on pointers since
+ * it calculates the size during preprocessing.
+ */
+#define hash_init(hashtable)   
\
+({ 
\
+   int __i;
\
+   
\
+   for (__i = 0; __i  HASH_SIZE(hashtable); __i++)
\
+   INIT_HLIST_HEAD(hashtable[__i]);   
\
+})
+
+/**
+ * hash_add - add an object to a hashtable
+ * @hashtable: hashtable to add to
+ * @node: the struct hlist_node of the object to be added
+ * @key: the key of the object to be added
+ */
+#define hash_add(hashtable, node, key) 
\
+   hlist_add_head(node, hashtable[hash_min(key, HASH_BITS(hashtable))]);
+
+/**
+ * hash_add_rcu - add an object to a rcu enabled hashtable
+ * @hashtable: hashtable to add to
+ * @node: the struct hlist_node of the object to be added
+ * @key: the key of the object to be added
+ */
+#define hash_add_rcu(hashtable, node, key) 
\
+   hlist_add_head_rcu(node, hashtable[hash_min(key, 
HASH_BITS(hashtable))]);
+
+/**
+ * hash_hashed - check whether an object is in any hashtable
+ * @node: the struct hlist_node of the object to be checked
+ */
+#define hash_hashed(node) (!hlist_unhashed(node))
+
+/**
+ * hash_empty - check whether a hashtable is empty
+ * @hashtable: hashtable to check
+ *
+ * This has to be a macro since HASH_BITS() will not work on pointers since
+ * it calculates the size during preprocessing.
+ */
+#define hash_empty(hashtable)  
\
+({ 
\
+   int __i;
\
+   bool __ret = true;  
\
+   
\
+   for (__i = 0; __i  HASH_SIZE(hashtable); __i++)
\
+   if (!hlist_empty(hashtable[__i]))  
\
+   __ret = false;  
\
+   
\
+   __ret;  
\
+})
+
+/**
+ * hash_del - remove an object from a hashtable
+ * @node: struct hlist_node of the object to remove
+ */
+static inline void hash_del(struct hlist_node *node)
+{
+   hlist_del_init(node);
+}
+
+/**
+ * hash_del_rcu - remove an object from a rcu enabled

Re: [PATCH v6] hashtable: introduce a small and naive hashtable

2012-09-26 Thread Sasha Levin

On 09/26/2012 03:59 PM, Steven Rostedt wrote:
 On Wed, 2012-09-26 at 14:45 +0100, David Laight wrote:
 Amazing how something simple gets lots of comments and versions :-)

 ...
 + * This has to be a macro since HASH_BITS() will not work on pointers since
 + * it calculates the size during preprocessing.
 + */
 +#define hash_empty(hashtable)  
 \
 +({ 
 \
 +   int __i;
 \
 +   bool __ret = true;  
 \
 +   
 \
 +   for (__i = 0; __i  HASH_SIZE(hashtable); __i++)
 \
 +   if (!hlist_empty(hashtable[__i]))  
 \
 +   __ret = false;  
 \
 +   
 \
 +   __ret;  
 \
 +})

 Actually you could have a #define that calls a function
 passing in the address and size.
 
 Probably would be cleaner to do so.

I think it's worth it if it was more complex than a simple loop. We were doing 
a similar thing with the _size() functions (see
version 4 of this patch), but decided to remove it since it was becoming too 
complex.
 
 
 Also, should the loop have a 'break' in it?
 
 Yeah it should, and could do:
 
   for (i = 0; i  HASH_SIZE(hashtable); i++)
   if (!hlist_empty(hashtable[i]))
   break;
 
   return i  HASH_SIZE(hashtable);

Right.


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v6] hashtable: introduce a small and naive hashtable

2012-09-27 Thread Sasha Levin

On 09/27/2012 10:25 AM, David Laight wrote:
 And even then, if we would do:

for (i = 0; i  HASH_SIZE(hashtable); i++)
if (!hlist_empty(hashtable[i]))
break;

return i = HASH_SIZE(hashtable);

 What happens if the last entry of the table is non-empty ?

 It still works, as 'i' is not incremented due to the break. And i will
 still be less than HASH_SIZE(hashtable). Did you have *your* cup of
 coffee today? ;-)

 Ahh, right! Actually I had it already ;-)
 
 I tend to dislike the repeated test, gcc might be able to optimise
 it away, but the code is cleaner written as:
 
   for (i = 0; i  HASH_SIZE(hashtable); i++)
   if (!hlist_empty(hashtable[i]))
   return false;
   return true;

Right, the flag thing in the macro was there just to make it work properly as a 
macro.

 Agreed that the flags should be removed. Moving to define + static
 inline is still important though.
 
 Not sure I'd bother making the function inline.

I usually never make anything 'inline', I just let gcc do it's own thing when 
it compiles the code. If there are any objections
please let me know before I send the new version.


Thanks,
Sasha

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] fs: prevent use after free in auditing when symlink following was denied

2012-10-04 Thread Sasha Levin

Commit fs: add link restriction audit reporting has added auditing of failed
attempts to follow symlinks. Unfortunately, the auditing was being done after
the struct path structure was released earlier.

Signed-off-by: Sasha Levin sasha.le...@oracle.com
---
 fs/namei.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/namei.c b/fs/namei.c
index aa30d19..6d47fac 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -692,9 +692,9 @@ static inline int may_follow_link(struct path *link, struct 
nameidata *nd)
if (uid_eq(parent-i_uid, inode-i_uid))
return 0;
 
+   audit_log_link_denied(follow_link, link);
path_put_conditional(link, nd);
path_put(nd-path);
-   audit_log_link_denied(follow_link, link);
return -EACCES;
 }
 
-- 
1.7.12

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] fs: handle failed audit_log_start properly

2012-10-04 Thread Sasha Levin

audit_log_start() may return NULL, this is unchecked by the caller in
audit_log_link_denied() and could cause a NULL ptr deref.

Introduced by commit a51d9eaa (fs: add link restriction audit reporting).

Signed-off-by: Sasha Levin sasha.le...@oracle.com
---
 kernel/audit.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/audit.c b/kernel/audit.c
index 4d0ceed..40414e9 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1440,6 +1440,8 @@ void audit_log_link_denied(const char *operation, struct 
path *link)
 
ab = audit_log_start(current-audit_context, GFP_KERNEL,
 AUDIT_ANOM_LINK);
+   if (!ab)
+   return;
audit_log_format(ab, op=%s action=denied, operation);
audit_log_format(ab,  pid=%d comm=, current-pid);
audit_log_untrustedstring(ab, current-comm);
-- 
1.7.12

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] net, bluetooth: don't attempt to free a channel that wasn't created

2012-10-04 Thread Sasha Levin

We may currently attempt to free a channel which wasn't created due to
an error in the initialization path, this would cause a NULL ptr deref.

Introduced in commit 61d6ef3e (Bluetooth: Make better use of l2cap_chan
reference counting).

Signed-off-by: Sasha Levin sasha.le...@oracle.com
---
 net/bluetooth/l2cap_sock.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 083f2bf..66c295a 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1083,7 +1083,8 @@ static void l2cap_sock_destruct(struct sock *sk)
 {
BT_DBG(sk %p, sk);
 
-   l2cap_chan_put(l2cap_pi(sk)-chan);
+   if (l2cap_pi(sk)-chan)
+   l2cap_chan_put(l2cap_pi(sk)-chan);
if (l2cap_pi(sk)-rx_busy_skb) {
kfree_skb(l2cap_pi(sk)-rx_busy_skb);
l2cap_pi(sk)-rx_busy_skb = NULL;
-- 
1.7.12

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] net, TTY: initialize tty-driver_data before usage

2012-10-04 Thread Sasha Levin

Commit 9c650ffc (TTY: ircomm_tty, add tty install) split _open() to
_install() and _open(). It also moved the initialization of driver_data
out of open(), but never added it to install() - causing a NULL ptr
deref whenever the driver was used.

Signed-off-by: Sasha Levin sasha.le...@oracle.com
---
 net/irda/ircomm/ircomm_tty.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 95a3a7a..496ce2c 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -421,6 +421,8 @@ static int ircomm_tty_install(struct tty_driver *driver, 
struct tty_struct *tty)
hashbin_insert(ircomm_tty, (irda_queue_t *) self, line, NULL);
}
 
+   tty-driver_data = self;
+
return tty_port_install(self-port, driver, tty);
 }
 
-- 
1.7.12

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] net, bluetooth: don't attempt to free a channel that wasn't created

2012-10-05 Thread Sasha Levin

On 10/05/2012 06:22 AM, Andrei Emeltchenko wrote:
 Hi Sasha,
 
 On Thu, Oct 04, 2012 at 07:59:57PM -0400, Sasha Levin wrote:
 We may currently attempt to free a channel which wasn't created due to
 an error in the initialization path, this would cause a NULL ptr deref.
 
 Please put oops dump here.

[   12.919073] BUG: unable to handle kernel NULL pointer dereference at 
0010
[   12.919131] IP: [836645c4] l2cap_chan_put+0x34/0x50
[   12.919135] PGD 0
[   12.919138] Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC
[   12.919193] Dumping ftrace buffer:
[   12.919242](ftrace buffer empty)
[   12.919314] Modules linked in:
[   12.919318] CPU 1
[   12.919319] Pid: 6210, comm: krfcommd Tainted: GW
3.6.0-next-20121004-sasha-5-gb010653-dirty #30
[   12.919374] RIP: 0010:[836645c4]  [836645c4] 
l2cap_chan_put+0x34/0x50
[   12.919377] RSP: :880066933c38  EFLAGS: 00010246
[   12.919378] RAX: 8366c780 RBX:  RCX: 6667
[   12.919379] RDX: 0fa0 RSI: 84d3f79e RDI: 0010
[   12.919381] RBP: 880066933c48 R08: 859989f8 R09: 0001
[   12.919382] R10:  R11: 7fff R12: 
[   12.919383] R13: 88009b00a200 R14: 88009b00a200 R15: 0001
[   12.919385] FS:  () GS:88003360() 
knlGS:
[   12.919437] CS:  0010 DS:  ES:  CR0: 80050033
[   12.919440] CR2: 0010 CR3: 05026000 CR4: 000406e0
[   12.919446] DR0:  DR1:  DR2: 
[   12.919451] DR3:  DR6: 0ff0 DR7: 0400
[   12.919504] Process krfcommd (pid: 6210, threadinfo 880066932000, task 
880065c4b000)
[   12.919506] Stack:
[   12.919510]  88009b00a200 880032084000 880066933c68 
8366c7bc
[   12.919513]  7fff 880032084000 880066933c98 
833ae0ae
[   12.919516]  880066933ca8   
88009b00a200
[   12.919517] Call Trace:
[   12.919522]  [8366c7bc] l2cap_sock_destruct+0x3c/0x80
[   12.919527]  [833ae0ae] __sk_free+0x1e/0x1f0
[   12.919530]  [833ae2f7] sk_free+0x17/0x20
[   12.919585]  [8366ca4e] l2cap_sock_alloc.constprop.5+0x9e/0xd0
[   12.919591]  [8366cb9e] l2cap_sock_create+0x7e/0x100
[   12.919652]  [83a4f32a] ? _raw_read_lock+0x6a/0x80
[   12.919658]  [836402c4] ? bt_sock_create+0x74/0x110
[   12.919660]  [83640308] bt_sock_create+0xb8/0x110
[   12.919664]  [833aa232] __sock_create+0x282/0x3b0
[   12.919720]  [833aa0b0] ? __sock_create+0x100/0x3b0
[   12.919725]  [836785b0] ? rfcomm_process_sessions+0x17e0/0x17e0
[   12.919779]  [833aa37f] sock_create_kern+0x1f/0x30
[   12.919784]  [83675714] rfcomm_l2sock_create+0x44/0x70
[   12.919787]  [836785b0] ? rfcomm_process_sessions+0x17e0/0x17e0
[   12.919790]  [836785fe] rfcomm_run+0x4e/0x1f0
[   12.919846]  [836785b0] ? rfcomm_process_sessions+0x17e0/0x17e0
[   12.919852]  [81138ee3] kthread+0xe3/0xf0
[   12.919908]  [8117b12e] ? put_lock_stats.isra.14+0xe/0x40
[   12.919914]  [81138e00] ? flush_kthread_work+0x1f0/0x1f0
[   12.919968]  [83a5077c] ret_from_fork+0x7c/0x90
[   12.919973]  [81138e00] ? flush_kthread_work+0x1f0/0x1f0
[   12.920161] Code: 83 ec 08 f6 05 ff 58 44 02 04 74 1b 8b 4f 10 48 89 fa 48 
c7 c6 d9 d7 d4 84 48 c7 c7 80 9e aa 85 31 c0 e8 80
ac 3a fe 48 8d 7b 10 f0 83 6b 10 01 0f 94 c0 84 c0 74 05 e8 8b e0 ff ff 48 83 
c4 08
[   12.920165] RIP  [836645c4] l2cap_chan_put+0x34/0x50
[   12.920166]  RSP 880066933c38
[   12.920167] CR2: 0010
[   12.920417] ---[ end trace 5a9114e8a158ab84 ]---

 
 Introduced in commit 61d6ef3e (Bluetooth: Make better use of l2cap_chan
 reference counting).

 Signed-off-by: Sasha Levin sasha.le...@oracle.com
 ---
  net/bluetooth/l2cap_sock.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

 diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
 index 083f2bf..66c295a 100644
 --- a/net/bluetooth/l2cap_sock.c
 +++ b/net/bluetooth/l2cap_sock.c
 @@ -1083,7 +1083,8 @@ static void l2cap_sock_destruct(struct sock *sk)
  {
  BT_DBG(sk %p, sk);
  
 -l2cap_chan_put(l2cap_pi(sk)-chan);
 +if (l2cap_pi(sk)-chan)
 +l2cap_chan_put(l2cap_pi(sk)-chan);
 
 This does not look right, I suppose you want to put somewhere missing
 chan_hold

The issue is basically kzalloc() failing in l2cap_chan_create(), this would 
lead to sk_free()
getting called with chan being NULL, which is why I don't think that chan_hold 
is relevant
at this stage.


Thanks,
Sasha

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http

vfs: oops on open_by_handle_at() in linux-next

2012-10-07 Thread Sasha Levin

Hi all,

While fuzzing with trinity inside a KVM tools guest using latest linux-next, 
I've stumbled on the following:

[   74.082463] BUG: unable to handle kernel paging request at 880061cd3000
[   74.087481] IP: [812190d0] shmem_alloc_inode+0x40/0x40
[   74.090032] PGD 4e27063 PUD 1fb7b067 PMD 1fc8a067 PTE 800061cd3160
[   74.090032] Oops:  [#1] PREEMPT SMP DEBUG_PAGEALLOC
[   74.090032] Dumping ftrace buffer:
[   74.090032](ftrace buffer empty)
[   74.090032] CPU 1
[   74.090032] Pid: 7234, comm: trinity-child40 Tainted: GW
3.6.0-next-20121005-sasha-1-g1eae105-dirty #34
[   74.090032] RIP: 0010:[812190d0]  [812190d0] 
shmem_alloc_inode+0x40/0x40
[   74.109655] RSP: 0018:8800268efd90  EFLAGS: 00010282
[   74.109655] RAX: 812190d0 RBX: 8800663d8a20 RCX: 
[   74.109655] RDX: 0001 RSI: 880061cd2ff8 RDI: 8800332a9000
[   74.109655] RBP: 8800268efef8 R08: 812d7450 R09: 
[   74.109655] R10: 0001 R11:  R12: 83c48340
[   74.109655] R13: 0001 R14:  R15: 
[   74.127365] ircomm_tty_close()
[   74.127345] FS:  7fbf37c56700() GS:88003360() 
knlGS:
[   74.127345] CS:  0010 DS:  ES:  CR0: 80050033
[   74.127345] CR2: 880061cd3000 CR3: 262c4000 CR4: 000406e0
[   74.127345] DR0:  DR1:  DR2: 
[   74.127345] DR3:  DR6: 0ff0 DR7: 0400
[   74.127345] Process trinity-child40 (pid: 7234, threadinfo 8800268ee000, 
task 880026208000)
[   74.127345] Stack:
[   74.127345]  81488649 85f2e7b0  
812d7450
[   74.127345]  880061cd2ff8 8800268efdc8 8117a23e 
8800268efde8
[   74.127345]  8117ac46 8800261d1108 880026208000 
8800268efe88
[   74.127345] Call Trace:
[   74.127345]  [81488649] ? exportfs_decode_fh+0x79/0x2d0
[   74.127345]  [812d7450] ? dump_seek+0xf0/0xf0
[   74.127345]  [8117a23e] ? put_lock_stats.isra.16+0xe/0x40
[   74.127345]  [8117ac46] ? lock_release_holdtime+0x126/0x140
[   74.127345]  [8117fbfe] ? lock_release_non_nested+0xde/0x310
[   74.127345]  [83a5d914] ? _raw_spin_unlock_irqrestore+0x84/0xb0
[   74.127345]  [812d77c3] do_handle_open+0x163/0x2c0
[   74.127345]  [812d792c] sys_open_by_handle_at+0xc/0x10
[   74.127345]  [83a5f3f8] tracesys+0xe1/0xe6
[   74.127345] Code: 48 85 c0 74 0e 48 05 78 01 00 00 eb 0e 66 0f 1f 44 00 00 
31 c0 66 0f 1f 44 00 00 5d c3 66 66 66 66 66 2e 0f
1f 84 00 00 00 00 00 8b 46 08 48 89 f1 8b 76 04 48 c1 e0 20 48 09 f0 83 fa 02 
7e 46
[   74.127345] RIP  [812190d0] shmem_alloc_inode+0x40/0x40
[   74.127345]  RSP 8800268efd90
[   74.127345] CR2: 880061cd3000
[   74.127345] ---[ end trace 60d7f664788c4cb8 ]---



 # addr2line -i -e vmlinux 812d792c
/usr/src/linux/fs/fhandle.c:265
 # addr2line -i -e vmlinux 812d77c3
/usr/src/linux/fs/fhandle.c:155
/usr/src/linux/fs/fhandle.c:205
/usr/src/linux/fs/fhandle.c:221
 # addr2line -i -e vmlinux 81488649
/usr/src/linux/fs/exportfs/expfs.c:385
 # addr2line -i -e vmlinux 812190d0
/usr/src/linux/mm/shmem.c:2224


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: blk: NULL ptr deref in blk_dequeue_request()

2012-10-07 Thread Sasha Levin

Ping?

I'm still seeing this on linux-next.

On Sat, Sep 22, 2012 at 4:35 PM, Sasha Levin levinsasha...@gmail.com wrote:
 Hi all,

 While fuzzing with trinity inside a KVM tools guest running the latest 
 linux-next kernel, I've stumbled on the following BUG.

 I've also hit a similar trace where the 'BUG_ON(ELV_ON_HASH(rq));' above that 
 list_del_init() gets hit, so I guess it's a race
 condition of some sorts.


 [9.900299] BUG: unable to handle kernel NULL pointer dereference at   
 (null)
 [9.909508] IP: [819ea637] __list_del_entry+0xb7/0xe0
 [9.910191] PGD 0
 [9.910191] Oops:  [#1] PREEMPT SMP DEBUG_PAGEALLOC
 [9.910191] Dumping ftrace buffer:
 [9.910191](ftrace buffer empty)
 [9.910191] CPU 2
 [9.910191] Pid: 3996, comm: kworker/u:2 Tainted: GW
 3.6.0-rc6-next-20120921-sasha-1-geb77a39-dirty #3
 [9.910191] RIP: 0010:[819ea637]  [819ea637] 
 __list_del_entry+0xb7/0xe0
 [9.910191] RSP: :880034e11c88  EFLAGS: 00010007
 [9.910191] RAX:  RBX: 880034e3ec00 RCX: 
 dead00200200
 [9.910191] RDX:  RSI: 85366998 RDI: 
 880034e3ec00
 [9.910191] RBP: 880034e11c88 R08:  R09: 
 88001af60928
 [9.910191] R10:  R11: 0001 R12: 
 
 [9.910191] R13: 85366360 R14:  R15: 
 85b4edd0
 [9.910191] FS:  () GS:88002980() 
 knlGS:
 [9.910191] CS:  0010 DS:  ES:  CR0: 80050033
 [9.910191] CR2:  CR3: 04c26000 CR4: 
 000406e0
 [9.910191] DR0:  DR1:  DR2: 
 
 [9.910191] DR3:  DR6: 0ff0 DR7: 
 0400
 [9.910191] Process kworker/u:2 (pid: 3996, threadinfo 880034e1, 
 task 88001af6)
 [9.910191] Stack:
 [9.910191]  880034e11ca8 819a1a45 880034e3ec00 
 
 [9.910191]  880034e11cc8 819a1ae1  
 880034e3ec00
 [9.910191]  880034e11ce8 819a271e  
 
 [9.910191] Call Trace:
 [9.910191]  [819a1a45] blk_dequeue_request+0x35/0xc0
 [9.910191]  [819a1ae1] blk_start_request+0x11/0x40
 [9.910191]  [819a271e] blk_fetch_request+0x1e/0x30
 [9.910191]  [81e5a89d] redo_fd_request+0x9d/0x3f0
 [9.910191]  [8112a779] process_one_work+0x3b9/0x770
 [9.910191]  [8112a628] ? process_one_work+0x268/0x770
 [9.910191]  [81177a22] ? get_lock_stats+0x22/0x70
 [9.910191]  [81e5a800] ? start_motor+0x120/0x120
 [9.910191]  [8112b0fa] worker_thread+0x2ba/0x3f0
 [9.910191]  [8112ae40] ? rescuer_thread+0x2d0/0x2d0
 [9.910191]  [81135d83] kthread+0xe3/0xf0
 [9.910191]  [81177aae] ? put_lock_stats.isra.16+0xe/0x40
 [9.910191]  [81135ca0] ? insert_kthread_work+0x90/0x90
 [9.910191]  [839f1e45] kernel_thread_helper+0x5/0x10
 [9.910191]  [81135ca0] ? insert_kthread_work+0x90/0x90
 [9.910191] Code: 6a 84 be 3e 00 00 00 48 c7 c7 7b d8 6a 84 31 c0 e8 8f c2 
 71 ff eb 2c 0f 1f 44 00 00 48 b9 00 02 20 00 00 00
 ad de 48 39 c8 74 8c 4c 8b 00 4c 39 c7 75 a6 4c 8b 42 08 4c 39 c7 75 bc 48 
 89 42 08
 [9.910191] RIP  [819ea637] __list_del_entry+0xb7/0xe0
 [9.910191]  RSP 880034e11c88
 [9.910191] CR2: 


 Thanks,
 Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 21/21] TTY: move tty buffers to tty_port

2012-11-02 Thread Sasha Levin

On 11/02/2012 12:18 PM, Jiri Slaby wrote:
 On 11/02/2012 05:07 PM, Sasha Levin wrote:
 On Fri, Nov 2, 2012 at 11:51 AM, Jiri Slaby jsl...@suse.cz wrote:
 On 10/31/2012 04:59 PM, Sasha Levin wrote:
 So you probably want a lot more than 100k syscalls, why limit it at
 all actually?

 I unset the limit but I still can't reproduce...

 I've attached my .config for the guest kernel as reference.

 Even using this config does not help to reproduce that.

 Do you use some special trinity params?

 Not really:

 ./trinity -m --quiet --dangerous -l off
 
 Oh, you run that as root??
 
 Can I add something to my kernel to provide more info when it happens?
 
 Maybe the attached patch can tell us more...
 

Nope, I see the warnings mentioned before, without the new 'HUH' warnings.


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: mm: NULL ptr deref in anon_vma_interval_tree_verify

2012-11-02 Thread Sasha Levin

Ping?

On Thu, Oct 25, 2012 at 4:26 PM, Sasha Levin levinsasha...@gmail.com wrote:
 On 10/18/2012 06:46 PM, Sasha Levin wrote:
 Hi all,

 While fuzzing with trinity inside a KVM tools (lkvm) guest, on today's 
 linux-next kernel,
 I saw the following:

 [ 1857.278176] BUG: unable to handle kernel NULL pointer dereference at 
 0090
 [ 1857.283725] IP: [81229d0f] 
 anon_vma_interval_tree_verify+0xf/0xa0
 [ 1857.283725] PGD 6e19e067 PUD 6e19f067 PMD 0
 [ 1857.283725] Oops:  [#1] PREEMPT SMP DEBUG_PAGEALLOC
 [ 1857.283725] Dumping ftrace buffer:
 [ 1857.283725](ftrace buffer empty)
 [ 1857.283725] CPU 2
 [ 1857.283725] Pid: 15637, comm: trinity-child18 Tainted: GW
 3.7.0-rc1-next-20121018-sasha-2-g60a870d-dirty #61
 [ 1857.283725] RIP: 0010:[81229d0f]  [81229d0f] 
 anon_vma_interval_tree_verify+0xf/0xa0
 [ 1857.283725] RSP: 0018:88007df0fce8  EFLAGS: 00010296
 [ 1857.283725] RAX: 880089db1000 RBX: 880089db0ff0 RCX: 
 8800869e6928
 [ 1857.283725] RDX:  RSI: 880089db1008 RDI: 
 880089db0ff0
 [ 1857.283725] RBP: 88007df0fcf8 R08: 88006427d508 R09: 
 88012bb95f20
 [ 1857.283725] R10: 0001 R11: 8800c8525c60 R12: 
 88006e199370
 [ 1857.283725] R13: 88006e199300 R14:  R15: 
 880089db1000
 [ 1857.283725] FS:  7f322fd4c700() GS:88004d60() 
 knlGS:
 [ 1857.283725] CS:  0010 DS:  ES:  CR0: 80050033
 [ 1857.283725] CR2: 0090 CR3: 6e19d000 CR4: 
 000406e0
 [ 1857.283725] DR0:  DR1:  DR2: 
 
 [ 1857.283725] DR3:  DR6: 0ff0 DR7: 
 0400
 [ 1857.283725] Process trinity-child18 (pid: 15637, threadinfo 
 88007df0e000, task 88007ac8)
 [ 1857.283725] Stack:
 [ 1857.283725]  88007df0fd38 880089db0ff0 88007df0fd48 
 81233b58
 [ 1857.283725]  88007df0fd38 880089db1000 80d0 
 880089db1000
 [ 1857.283725]  88012bb95f20 885d97c8 885d97d8 
 880089db1000
 [ 1857.283725] Call Trace:
 [ 1857.283725]  [81233b58] validate_mm+0x58/0x1e0
 [ 1857.283725]  [81233da4] vma_link+0x94/0xe0
 [ 1857.283725]  [83a67fd4] ? _raw_spin_unlock_irqrestore+0x84/0xb0
 [ 1857.283725]  [81235f75] mmap_region+0x3f5/0x5c0
 [ 1857.283725]  [812363f7] do_mmap_pgoff+0x2b7/0x330
 [ 1857.283725]  [81220fd1] ? vm_mmap_pgoff+0x61/0xa0
 [ 1857.283725]  [81220fea] vm_mmap_pgoff+0x7a/0xa0
 [ 1857.283725]  [81234c72] sys_mmap_pgoff+0x182/0x1a0
 [ 1857.283725]  [8107dc40] ? syscall_trace_enter+0x20/0x2e0
 [ 1857.283725]  [810738dd] sys_mmap+0x1d/0x20
 [ 1857.283725]  [83a69ad8] tracesys+0xe1/0xe6
 [ 1857.283725] Code: 48 39 ce 77 9e f3 c3 0f 1f 44 00 00 31 c0 c3 66 66 66 
 66 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 53 48 89 fb
 48 83 ec 08 48 8b 17 48 8b 8a 90 00 00 00 48 39 4f 40 74 34 80 3d a6 82 5b 
 04 00 75
 [ 1857.283725] RIP  [81229d0f] 
 anon_vma_interval_tree_verify+0xf/0xa0
 [ 1857.283725]  RSP 88007df0fce8
 [ 1857.283725] CR2: 0090
 [ 1858.611277] ---[ end trace b51cc425e9b07fc0 ]---

 The obvious part is that anon_vma_interval_tree_verify() got called with 
 node == NULL, but when
 looking at the caller:

 list_for_each_entry(avc, vma-anon_vma_chain, same_vma)
 anon_vma_interval_tree_verify(avc);

 How it got called with said NULL becomes less obvious.

 I've hit a similar one with today's -next. It isn't exactly the same, but
 I suspect it's the same issue.

 [ 1523.657950] BUG: unable to handle kernel paging request at fff0
 [ 1523.660022] IP: [8122c29c] anon_vma_interval_tree_verify+0xc/0xa0
 [ 1523.660022] PGD 4e28067 PUD 4e29067 PMD 0
 [ 1523.675725] Oops:  [#1] PREEMPT SMP DEBUG_PAGEALLOC
 [ 1523.750066] CPU 0
 [ 1523.750066] Pid: 9050, comm: trinity-child64 Tainted: GW
 3.7.0-rc2-next-20121025-sasha-1-g673f98e-dirty #77
 [ 1523.750066] RIP: 0010:[8122c29c]  [8122c29c] 
 anon_vma_interval_tree_verify+0xc/0xa0
 [ 1523.750066] RSP: 0018:880045f81d48  EFLAGS: 00010296
 [ 1523.750066] RAX:  RBX: fff0 RCX: 
 
 [ 1523.750066] RDX:  RSI: 0001 RDI: 
 fff0
 [ 1523.750066] RBP: 880045f81d58 R08:  R09: 
 0f14
 [ 1523.750066] R10: 0f12 R11:  R12: 
 8800096c8d70
 [ 1523.750066] R13: 8800096c8d00 R14:  R15: 
 8800095b45e0
 [ 1523.750066] FS:  7f7a923f3700() GS:88001360() 
 knlGS:
 [ 1523.750066] CS:  0010 DS:  ES:  CR0: 80050033
 [ 1523.750066] CR2: fff0 CR3: 0969d000 CR4: 
 000406f0
 [ 1523.750066] DR0:

Re: [PATCH 21/21] TTY: move tty buffers to tty_port

2012-11-03 Thread Sasha Levin

On 11/03/2012 11:55 AM, Jiri Slaby wrote:
 On 11/03/2012 03:03 AM, Sasha Levin wrote:
 On 11/02/2012 12:18 PM, Jiri Slaby wrote:
 On 11/02/2012 05:07 PM, Sasha Levin wrote:
 On Fri, Nov 2, 2012 at 11:51 AM, Jiri Slaby jsl...@suse.cz wrote:
 On 10/31/2012 04:59 PM, Sasha Levin wrote:
 So you probably want a lot more than 100k syscalls, why limit it at
 all actually?

 I unset the limit but I still can't reproduce...

 I've attached my .config for the guest kernel as reference.

 Even using this config does not help to reproduce that.

 Do you use some special trinity params?

 Not really:

 ./trinity -m --quiet --dangerous -l off

 Oh, you run that as root??

 Can I add something to my kernel to provide more info when it happens?

 Maybe the attached patch can tell us more...


 Nope, I see the warnings mentioned before, without the new 'HUH' warnings.
 
 Actually it does. It is exactly as you wrote some time earlier. The work
 is scheduled after is was cancelled and should not trigger anymore. Or,
 it is scheduled before it is supposed to do. Could you try the attached
 patch and report what happens with that patch?
 
 PS I can't reproduce by whatever I tried.
 
 thanks,
 

Interesting...

[  388.783955] tty is bad=0 ops=  (null)Pid: 6480, comm: kworker/1:2 
Tainted: GW
3.7.0-rc3-next-20121102-sasha-2-gbb570e0-dirty #111


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 21/21] TTY: move tty buffers to tty_port

2012-11-03 Thread Sasha Levin

On 11/03/2012 07:06 PM, Sasha Levin wrote:
 On 11/03/2012 11:55 AM, Jiri Slaby wrote:
 On 11/03/2012 03:03 AM, Sasha Levin wrote:
 On 11/02/2012 12:18 PM, Jiri Slaby wrote:
 On 11/02/2012 05:07 PM, Sasha Levin wrote:
 On Fri, Nov 2, 2012 at 11:51 AM, Jiri Slaby jsl...@suse.cz wrote:
 On 10/31/2012 04:59 PM, Sasha Levin wrote:
 So you probably want a lot more than 100k syscalls, why limit it at
 all actually?

 I unset the limit but I still can't reproduce...

 I've attached my .config for the guest kernel as reference.

 Even using this config does not help to reproduce that.

 Do you use some special trinity params?

 Not really:

 ./trinity -m --quiet --dangerous -l off

 Oh, you run that as root??

 Can I add something to my kernel to provide more info when it happens?

 Maybe the attached patch can tell us more...


 Nope, I see the warnings mentioned before, without the new 'HUH' warnings.

 Actually it does. It is exactly as you wrote some time earlier. The work
 is scheduled after is was cancelled and should not trigger anymore. Or,
 it is scheduled before it is supposed to do. Could you try the attached
 patch and report what happens with that patch?

 PS I can't reproduce by whatever I tried.

 thanks,

 
 Interesting...
 
 [  388.783955] tty is bad=0 ops=  (null)Pid: 6480, comm: kworker/1:2 
 Tainted: GW
 3.7.0-rc3-next-20121102-sasha-2-gbb570e0-dirty #111

So after fuzzing for a while I'm also seeing these:

[  603.533932] tty is bad=-2 ops=  (null)Pid: 37, comm: kworker/4:0 
Tainted: GW3.7.0-rc3-next-20121102-sasha-000
02-gbb570e0-dirty #112


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 0/8] drop if around WARN_ON

2012-11-04 Thread Sasha Levin

Hi Julia,

On Sat, Nov 3, 2012 at 4:30 PM, Julia Lawall julia.law...@lip6.fr wrote:
 These patches convert a conditional with a simple test expression and a
 then branch that only calls WARN_ON(1) to just a call to WARN_ON, which
 will test the condition.

 // smpl
 @@
 expression e;
 @@

 (
 if(+...e(...)...+) WARN_ON(1);
 |
 - if (e) WARN_ON(1);
 + WARN_ON(e);
 )// /smpl

So this deals with WARN_ON(), are you considering doing the same for
the rest of it's friends?


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 0/8] drop if around WARN_ON

2012-11-04 Thread Sasha Levin

On Sun, Nov 4, 2012 at 10:57 AM, Julia Lawall julia.law...@lip6.fr wrote:
 On Sun, 4 Nov 2012, Sasha Levin wrote:

 Hi Julia,

 On Sat, Nov 3, 2012 at 4:30 PM, Julia Lawall julia.law...@lip6.fr wrote:

 These patches convert a conditional with a simple test expression and a
 then branch that only calls WARN_ON(1) to just a call to WARN_ON, which
 will test the condition.

 // smpl
 @@
 expression e;
 @@

 (
 if(+...e(...)...+) WARN_ON(1);
 |
 - if (e) WARN_ON(1);
 + WARN_ON(e);
 )// /smpl


 So this deals with WARN_ON(), are you considering doing the same for
 the rest of it's friends?


 I tried WARN_ON_ONCE, but the pattern never occurred.  Are there others that
 are worth trying?

Definitely!

Here's the semantic patch I've got:

@@
expression e;
@@

(
- if (e) WARN_ON(1);
+ WARN_ON(e);
|
- if (e) WARN_ON_ONCE(1);
+ WARN_ON_ONCE(e);
|
- if (e) WARN_ON_SMP(1);
+ WARN_ON_SMP(e);
|
- if (e) BUG();
+ BUG_ON(e);
)

This gave me a really huge patch output.

I can send it out if you think the patch above looks good.


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 0/8] drop if around WARN_ON

2012-11-04 Thread Sasha Levin

On Sun, Nov 4, 2012 at 11:16 AM, Julia Lawall julia.law...@lip6.fr wrote:
 I didn't change any cases where the if test contains a function call.  The
 current definitions of WARN_ON seem to always evaluate the condition
 expression, but I was worried that that might not always be the case.  And
 calling a function (the ones I remember were some kinds of print functions)
 seems like something one might not want buried in the argument of a
 debugging macro.

Makes sense.

 WARN_ON_SMP is just WARN_ON if CONFIG_SMP is true, but it is just 0
 otherwise.  So in that case it seems important to check that one is not
 throwing away something important.

Yup, we just need to make sure that the expression being evaluated doesn't
have side-effects.

 I remember working on the BUG_ON case several years ago, and other people
 worked on it too, but I guess some are still there...  The current
 definitions of BUG_ON seem to keep the condition, but there are quite a few
 specialized definitions, so someone at some point might make a version that
 does not have that property.

It makes sense to keep an eye for such things when converting code. I
also don't think we'll get to see a version of BUG_ON which doesn't
evaluate the expression since the kernel already has more than enough
BUG_ONs that assume the expression will be evaluated:

BUG_ON(HYPERVISOR_callback_op(CALLBACKOP_register, event));
BUG_ON(gpiochip_add(gemini_gpio_chip));
BUG_ON(clocksource_register_hz(sirfsoc_clocksource, CLOCK_TICK_RATE));
BUG_ON(gpio_request(ZOOM2_HEADSET_MUX_GPIO, hs_mux)  0);

And so on, so we're probably safe converting to BUG_ON even if the
condition is a function, as long as it doesn't create a long and
complicated BUG_ON() ofcourse.


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: tty, vt: lockdep warnings

2012-11-05 Thread Sasha Levin

Ping? Should I bisect it?

On Fri, Oct 26, 2012 at 9:37 AM, Alan Cox a...@lxorguk.ukuu.org.uk wrote:
 On Thu, 25 Oct 2012 15:37:43 -0400
 Sasha Levin sasha.le...@oracle.com wrote:

 Hi all,

 While fuzzing with trinity inside a KVM tools (lkvm) guest running latest 
 -next kernel,
 I've stumbled on the following spew:

 Looks real enough but its not a tty/vt layer spew. This is all coming out
 of the core framebuffer code which doesn't seem to be able to decide what
 the locking rules at the invocation of fb_notifier_call_chain are.

 It might need some console layer tweaking to provide 'register console
 and I already hold the locks' or similar but that notifier needs some
 kind of sanity applying as well.

 Cc'ing the fbdev folks

 Alan
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: tty, vt: lockdep warnings

2012-11-05 Thread Sasha Levin

On 11/05/2012 12:59 PM, Alan Cox wrote:
 On Mon, 5 Nov 2012 12:26:43 -0500
 Sasha Levin levinsasha...@gmail.com wrote:
 
 Ping? Should I bisect it?

 On Fri, Oct 26, 2012 at 9:37 AM, Alan Cox a...@lxorguk.ukuu.org.uk wrote:
 On Thu, 25 Oct 2012 15:37:43 -0400
 Sasha Levin sasha.le...@oracle.com wrote:

 Hi all,

 While fuzzing with trinity inside a KVM tools (lkvm) guest running latest 
 -next kernel,
 I've stumbled on the following spew:

 Looks real enough but its not a tty/vt layer spew. This is all coming out
 of the core framebuffer code which doesn't seem to be able to decide what
 the locking rules at the invocation of fb_notifier_call_chain are.

 It might need some console layer tweaking to provide 'register console
 and I already hold the locks' or similar but that notifier needs some
 kind of sanity applying as well.

 Cc'ing the fbdev folks
 
 I've cc'd the framebuffer folks. I can see why its occurring but I have
 no idea how they intend to fix it and I've not seen any replies.
 
 Sorry but I've got enough other things on my plate right now without
 trying to deal with the locking brain damage that the fbdev layer is.
 
 As far as I can tell the actual bug proper is years old.
 
 Alan
 

Ow, I figured it's something new since I've only now started seeing it in fuzz
tests, and it reproduces pretty much every time.


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 0/6] VSOCK for Linux upstreaming

2012-11-05 Thread Sasha Levin

Hi George,

On Mon, Nov 5, 2012 at 1:00 PM, George Zhang georgezh...@vmware.com wrote:
 For additional information about the use of VMCI and in particular
 VMCI Sockets, please refer to the VMCI Socket Programming Guide
 available at https://www.vmware.com/support/developer/vmci-sdk/.

Is there documentation about the protocol itself? I couldn't find it
on the link above.


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: mm: NULL ptr deref in anon_vma_interval_tree_verify

2012-11-06 Thread Sasha Levin

On 11/06/2012 10:54 PM, Michel Lespinasse wrote:
 On Tue, Nov 6, 2012 at 12:24 AM, Michel Lespinasse wal...@google.com wrote:
 On Mon, Nov 5, 2012 at 5:41 AM, Michel Lespinasse wal...@google.com wrote:
 On Sun, Nov 4, 2012 at 8:44 PM, Michel Lespinasse wal...@google.com wrote:
 On Sun, Nov 4, 2012 at 8:14 PM, Bob Liu lliu...@gmail.com wrote:
 Hmm, I attached a simple fix patch.

 Reviewed-by: Michel Lespinasse wal...@google.com
 (also ran some tests with it, but I could never reproduce the original
 issue anyway).

 Wait a minute, this is actually wrong. You need to call
 vma_lock_anon_vma() / vma_unlock_anon_vma() to avoid the issue with
 vma-anon_vma == NULL.

 I'll fix it and integrate it into my next patch series, which I intend
 to send later today. (I am adding new code into validate_mm(), so that
 it's easier to have it in the same patch series to avoid merge
 conflicts)

 Hmmm, now I'm getting confused about anon_vma locking again :/

 As Hugh privately remarked to me, the same_vma linked list is supposed
 to be protected by exclusive mmap_sem ownership, not by anon_vma lock.
 So now looking at it a bit more, I'm not sure what race we're
 preventing by taking the anon_vma lock in validate_mm() ???
 
 Looking at it a bit more:
 
 the same_vma linked list is *generally* protected by *exclusive*
 mmap_sem ownership. However, in expand_stack() we only have *shared*
 mmap_sem ownership, so that two concurrent expand_stack() calls
 (possibly on different vmas that have a different anon_vma lock) could
 race with each other. For this reason we do need the validate_mm()
 taking each vma's anon_vma lock (if any) before calling
 anon_vma_interval_tree_verify().
 
 While this justifies Bob's patch, this does not explain Sasha's
 reports - in both of them the backtrace did not involve
 expand_stack(), and there should be exclusive mmap_sem ownership, so
 I'm still unclear as to what could be causing Sasha's issue.
 
 Sasha, how reproduceable is this ?

This is pretty hard to reproduce, I've seen this only twice so far.

 
 Also, would the following change print something when the issue triggers ?

I'll run it with your patch, but as I've mentioned above - it's a PITA
to reproduce.


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: tty, vt: lockdep warnings

2012-11-07 Thread Sasha Levin

On 11/06/2012 12:38 PM, Alan Cox wrote:
   The root
   cause is loading two different framebuffers with one taking over from
   another - that should be an obscure corner case and once the fuzz testing
   can avoid.
   
   I had a semi-informed poke at this and came up with a possible patch (not 
 very tested)

 If this fixes the real problems we've been seeing, I'll dance a jig.
 
 Youtube...

+1

 At this point my bigger concern is that it'll just make something else
 warn instead. The underlying problem is that fbcon layer implements a
 single threaded notifier whose locking semantics are at best random. It's
 not calld with a specific set of locks each time. Possibly it sohuld be
 two notifiers (one for fb stuff, one for console layer stuff) but the
 entire layer is horrible. I live in home the KMS guys will rip out the
 useful bits and build a straight kms fb layer with refcounting and the
 like 8)
 
 Testing certainly needed and if it's still blowing up then hopefully
 further traces will help fix up the other cases we don't know about.

So the good news are that the original lockdep splat I've reported is gone.

The semi-bad news are that there's a new one. It happens less frequently
but I assume it's not a new splat either, but was well hidden behind the
other splat.

[ 1885.997312] ==
[ 1885.997312] [ INFO: possible circular locking dependency detected ]
[ 1885.997316] 3.7.0-rc4-next-20121106-sasha-8-g353b62f #117 Tainted: G 
   W
[ 1885.997316] ---
[ 1885.997319] trinity-child26/7820 is trying to acquire lock:
[ 1885.997330]  (fb_info-lock){+.+.+.}, at: [81a665d1] 
lock_fb_info+0x21/0x50
[ 1885.997331]
[ 1885.997331] but task is already holding lock:
[ 1885.997336]  (console_lock){+.+.+.}, at: [81a6c469] 
store_modes+0x59/0x100
[ 1885.997337]
[ 1885.997337] which lock already depends on the new lock.
[ 1885.997337]
[ 1885.997338]
[ 1885.997338] the existing dependency chain (in reverse order) is:
[ 1885.997341]
[ 1885.997341] - #1 (console_lock){+.+.+.}:
[ 1885.997347][8118536a] lock_acquire+0x1aa/0x240
[ 1885.997351][8110b618] console_lock+0x68/0x70
[ 1885.997354][81a689b2] register_framebuffer+0x242/0x2a0
[ 1885.997359][8390e4bc] vga16fb_probe+0x1c0/0x227
[ 1885.997364][81e6b472] platform_drv_probe+0x12/0x20
[ 1885.997369][81e69e95] driver_probe_device+0x155/0x340
[ 1885.997372][81e6a15e] __device_attach+0x2e/0x50
[ 1885.997375][81e682d6] bus_for_each_drv+0x56/0xa0
[ 1885.997379][81e69a88] device_attach+0x88/0xc0
[ 1885.997382][81e68516] bus_probe_device+0x36/0xd0
[ 1885.997385][81e6639f] device_add+0x4df/0x750
[ 1885.997388][81e6bda8] platform_device_add+0x1e8/0x280
[ 1885.997393][85b0f35a] vga16fb_init+0x8d/0xbb
[ 1885.997399][85acccb2] do_one_initcall+0x7a/0x135
[ 1885.997402][838db8d9] kernel_init+0x299/0x470
[ 1885.997406][83a98fbc] ret_from_fork+0x7c/0xb0
[ 1885.997409]
[ 1885.997409] - #0 (fb_info-lock){+.+.+.}:
[ 1885.997413][811825af] __lock_acquire+0x14df/0x1ca0
[ 1885.997416][8118536a] lock_acquire+0x1aa/0x240
[ 1885.997421][83a944d9] __mutex_lock_common+0x59/0x5a0
[ 1885.997425][83a94a5f] mutex_lock_nested+0x3f/0x50
[ 1885.997427][81a665d1] lock_fb_info+0x21/0x50
[ 1885.997430][81a68b95] fb_new_modelist+0xf5/0x140
[ 1885.997433][81a6c4ac] store_modes+0x9c/0x100
[ 1885.997436][81e65013] dev_attr_store+0x13/0x20
[ 1885.997440][812f820a] sysfs_write_file+0xfa/0x150
[ 1885.997444][8127a220] vfs_write+0xb0/0x180
[ 1885.997447][8127a3e0] sys_write+0x50/0xa0
[ 1885.997450][83a99298] tracesys+0xe1/0xe6
[ 1885.997451]
[ 1885.997451] other info that might help us debug this:
[ 1885.997451]
[ 1885.997452]  Possible unsafe locking scenario:
[ 1885.997452]
[ 1885.997453]CPU0CPU1
[ 1885.997454]
[ 1885.997456]   lock(console_lock);
[ 1885.997458]lock(fb_info-lock);
[ 1885.997460]lock(console_lock);
[ 1885.997462]   lock(fb_info-lock);
[ 1885.997463]
[ 1885.997463]  *** DEADLOCK ***
[ 1885.997463]
[ 1885.997464] 3 locks held by trinity-child26/7820:
[ 1885.997470]  #0:  (buffer-mutex){+.+.+.}, at: [812f8153] 
sysfs_write_file+0x43/0x150
[ 1885.997475]  #1:  (s_active#388){.+.+.+}, at: [812f81f2] 
sysfs_write_file+0xe2/0x150
[ 1885.997481]  #2:  (console_lock){+.+.+.}, at: [81a6c469] 
store_modes+0x59/0x100
[ 1885.997481]
[ 1885.997481] stack backtrace:
[ 1885.997484] Pid: 7820, comm:

Re: [PATCH v6 28/29] slub: slub-specific propagation changes.

2012-11-07 Thread Sasha Levin

On 11/01/2012 08:07 AM, Glauber Costa wrote:
 SLUB allows us to tune a particular cache behavior with sysfs-based
 tunables.  When creating a new memcg cache copy, we'd like to preserve
 any tunables the parent cache already had.
 
 This can be done by tapping into the store attribute function provided
 by the allocator. We of course don't need to mess with read-only
 fields. Since the attributes can have multiple types and are stored
 internally by sysfs, the best strategy is to issue a -show() in the
 root cache, and then -store() in the memcg cache.
 
 The drawback of that, is that sysfs can allocate up to a page in
 buffering for show(), that we are likely not to need, but also can't
 guarantee. To avoid always allocating a page for that, we can update the
 caches at store time with the maximum attribute size ever stored to the
 root cache. We will then get a buffer big enough to hold it. The
 corolary to this, is that if no stores happened, nothing will be
 propagated.
 
 It can also happen that a root cache has its tunables updated during
 normal system operation. In this case, we will propagate the change to
 all caches that are already active.
 
 Signed-off-by: Glauber Costa glom...@parallels.com
 CC: Christoph Lameter c...@linux.com
 CC: Pekka Enberg penb...@cs.helsinki.fi
 CC: Michal Hocko mho...@suse.cz
 CC: Kamezawa Hiroyuki kamezawa.hir...@jp.fujitsu.com
 CC: Johannes Weiner han...@cmpxchg.org
 CC: Suleiman Souhlal sulei...@google.com
 CC: Tejun Heo t...@kernel.org
 ---

Hi guys,

This patch is making lockdep angry! *bark bark*

[  351.935003] ==
[  351.937693] [ INFO: possible circular locking dependency detected ]
[  351.939720] 3.7.0-rc4-next-20121106-sasha-8-g353b62f #117 Tainted: G 
   W
[  351.942444] ---
[  351.943528] trinity-child13/6961 is trying to acquire lock:
[  351.943528]  (s_active#43){.+}, at: [812f9e11] 
sysfs_addrm_finish+0x31/0x60
[  351.943528]
[  351.943528] but task is already holding lock:
[  351.943528]  (slab_mutex){+.+.+.}, at: [81228a42] 
kmem_cache_destroy+0x22/0xe0
[  351.943528]
[  351.943528] which lock already depends on the new lock.
[  351.943528]
[  351.943528]
[  351.943528] the existing dependency chain (in reverse order) is:
[  351.943528]
- #1 (slab_mutex){+.+.+.}:
[  351.960334][8118536a] lock_acquire+0x1aa/0x240
[  351.960334][83a944d9] __mutex_lock_common+0x59/0x5a0
[  351.960334][83a94a5f] mutex_lock_nested+0x3f/0x50
[  351.960334][81256a6e] slab_attr_store+0xde/0x110
[  351.960334][812f820a] sysfs_write_file+0xfa/0x150
[  351.960334][8127a220] vfs_write+0xb0/0x180
[  351.960334][8127a540] sys_pwrite64+0x60/0xb0
[  351.960334][83a99298] tracesys+0xe1/0xe6
[  351.960334]
- #0 (s_active#43){.+}:
[  351.960334][811825af] __lock_acquire+0x14df/0x1ca0
[  351.960334][8118536a] lock_acquire+0x1aa/0x240
[  351.960334][812f9272] sysfs_deactivate+0x122/0x1a0
[  351.960334][812f9e11] sysfs_addrm_finish+0x31/0x60
[  351.960334][812fa369] sysfs_remove_dir+0x89/0xd0
[  351.960334][819e1d96] kobject_del+0x16/0x40
[  351.960334][8125ed40] __kmem_cache_shutdown+0x40/0x60
[  351.960334][81228a60] kmem_cache_destroy+0x40/0xe0
[  351.960334][82b21058] mon_text_release+0x78/0xe0
[  351.960334][8127b3b2] __fput+0x122/0x2d0
[  351.960334][8127b569] fput+0x9/0x10
[  351.960334][81131b4e] task_work_run+0xbe/0x100
[  351.960334][81110742] do_exit+0x432/0xbd0
[  351.960334][81110fa4] do_group_exit+0x84/0xd0
[  351.960334][8112431d] get_signal_to_deliver+0x81d/0x930
[  351.960334][8106d5aa] do_signal+0x3a/0x950
[  351.960334][8106df1e] do_notify_resume+0x3e/0x90
[  351.960334][83a993aa] int_signal+0x12/0x17
[  351.960334]
[  351.960334] other info that might help us debug this:
[  351.960334]
[  351.960334]  Possible unsafe locking scenario:
[  351.960334]
[  351.960334]CPU0CPU1
[  351.960334]
[  351.960334]   lock(slab_mutex);
[  351.960334]lock(s_active#43);
[  351.960334]lock(slab_mutex);
[  351.960334]   lock(s_active#43);
[  351.960334]
[  351.960334]  *** DEADLOCK ***
[  351.960334]
[  351.960334] 2 locks held by trinity-child13/6961:
[  351.960334]  #0:  (mon_lock){+.+.+.}, at: [82b21005] 
mon_text_release+0x25/0xe0
[  351.960334]  #1:  (slab_mutex){+.+.+.}, at: [81228a42] 
kmem_cache_destroy+0x22/0xe0
[  351.960334]
[  351.960334] stack backtrace:
[  351.960334] Pid: 6961, comm:

Re: [GIT] floppy

2012-11-07 Thread Sasha Levin

Hi guys!

This is me complaining that I still see the spews that this pull
request should fix, probably because this mail got lost.


Thanks,
Sasha

On Fri, Oct 26, 2012 at 3:15 PM, Jiri Kosina jkos...@suse.cz wrote:
 Jens,

 please pull from

   git://git.kernel.org/pub/scm/linux/kernel/git/jikos/linux-block.git for-jens

 into your for-3.7/drivers branch to receive revert of patch that Sasha
 Levin reported to cause NULL pointer dereferences, and it's not necessary.

 Thanks!

 Jiri Kosina (1):
   Revert genhd: Make put_disk() safe for disks that have not been 
 registered

  block/genhd.c  |6 +++---
  drivers/block/floppy.c |   13 +
  2 files changed, 16 insertions(+), 3 deletions(-)

 --
 Jiri Kosina
 SUSE Labs
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [GIT] floppy

2012-11-07 Thread Sasha Levin

On Wed, Nov 7, 2012 at 3:48 PM, Jiri Kosina jkos...@suse.cz wrote:
 On Wed, 7 Nov 2012, Sasha Levin wrote:

 Hi guys!

 This is me complaining that I still see the spews that this pull
 request should fix, probably because this mail got lost.

 You are seeing that on -next, right?

Yup.

 That's because a different version of the patchset has been pushed to
 Linus, but for-next branch in Jens' tree still contains the old patchset
 containing the faulty patch (which is never going upstream).

 The fixed series is now in Linus' tree (as per -rc4), and I guess you are
 not having the problem there, right?

I haven't thought of trying just Linus's tree because I thought that
the original patch
that causes the breakage didn't make it there, so it would be obvious
that it would
work fine.

I'll give it a go I guess.


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] vmxnet3: convert BUG_ON(true) into a simple BUG()

2012-11-08 Thread Sasha Levin

Signed-off-by: Sasha Levin sasha.le...@oracle.com
---
 drivers/net/vmxnet3/vmxnet3_drv.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c 
b/drivers/net/vmxnet3/vmxnet3_drv.c
index 0ae1bcc..7e9622f 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1922,7 +1922,7 @@ vmxnet3_free_irqs(struct vmxnet3_adapter *adapter)
free_irq(adapter-pdev-irq, adapter-netdev);
break;
default:
-   BUG_ON(true);
+   BUG();
}
 }
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] ARM: gic: use BUG_ON where possible

2012-11-08 Thread Sasha Levin

Just use BUG_ON() instead of constructions such as:

if (...)
BUG()

A simplified version of the semantic patch that makes this transformation
is as follows: (http://coccinelle.lip6.fr/)

// smpl
@@
expression e;
@@
- if (e) BUG();
+ BUG_ON(e);
// /smpl

Signed-off-by: Sasha Levin sasha.le...@oracle.com
---
 arch/arm/common/gic.c |   18 ++
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c
index aa52699..f0b8a10 100644
--- a/arch/arm/common/gic.c
+++ b/arch/arm/common/gic.c
@@ -336,10 +336,8 @@ static struct irq_chip gic_chip = {
 
 void __init gic_cascade_irq(unsigned int gic_nr, unsigned int irq)
 {
-   if (gic_nr = MAX_GIC_NR)
-   BUG();
-   if (irq_set_handler_data(irq, gic_data[gic_nr]) != 0)
-   BUG();
+   BUG_ON(gic_nr = MAX_GIC_NR);
+   BUG_ON(irq_set_handler_data(irq, gic_data[gic_nr]) != 0);
irq_set_chained_handler(irq, gic_handle_cascade_irq);
 }
 
@@ -421,8 +419,7 @@ static void gic_dist_save(unsigned int gic_nr)
void __iomem *dist_base;
int i;
 
-   if (gic_nr = MAX_GIC_NR)
-   BUG();
+   BUG_ON(gic_nr = MAX_GIC_NR);
 
gic_irqs = gic_data[gic_nr].gic_irqs;
dist_base = gic_data_dist_base(gic_data[gic_nr]);
@@ -456,8 +453,7 @@ static void gic_dist_restore(unsigned int gic_nr)
unsigned int i;
void __iomem *dist_base;
 
-   if (gic_nr = MAX_GIC_NR)
-   BUG();
+   BUG_ON(gic_nr = MAX_GIC_NR);
 
gic_irqs = gic_data[gic_nr].gic_irqs;
dist_base = gic_data_dist_base(gic_data[gic_nr]);
@@ -493,8 +489,7 @@ static void gic_cpu_save(unsigned int gic_nr)
void __iomem *dist_base;
void __iomem *cpu_base;
 
-   if (gic_nr = MAX_GIC_NR)
-   BUG();
+   BUG_ON(gic_nr = MAX_GIC_NR);
 
dist_base = gic_data_dist_base(gic_data[gic_nr]);
cpu_base = gic_data_cpu_base(gic_data[gic_nr]);
@@ -519,8 +514,7 @@ static void gic_cpu_restore(unsigned int gic_nr)
void __iomem *dist_base;
void __iomem *cpu_base;
 
-   if (gic_nr = MAX_GIC_NR)
-   BUG();
+   BUG_ON(gic_nr = MAX_GIC_NR);
 
dist_base = gic_data_dist_base(gic_data[gic_nr]);
cpu_base = gic_data_cpu_base(gic_data[gic_nr]);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] ARM: kprobes: use BUG_ON where possible

2012-11-08 Thread Sasha Levin

Just use BUG_ON() instead of constructions such as:

if (...)
BUG()

A simplified version of the semantic patch that makes this transformation
is as follows: (http://coccinelle.lip6.fr/)

// smpl
@@
expression e;
@@
- if (e) BUG();
+ BUG_ON(e);
// /smpl

Signed-off-by: Sasha Levin sasha.le...@oracle.com
---
 arch/arm/kernel/kprobes-test.c |3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/arm/kernel/kprobes-test.c b/arch/arm/kernel/kprobes-test.c
index 1862d8f..0fb370d 100644
--- a/arch/arm/kernel/kprobes-test.c
+++ b/arch/arm/kernel/kprobes-test.c
@@ -1212,8 +1212,7 @@ static int register_test_probe(struct test_probe *probe)
 {
int ret;
 
-   if (probe-registered)
-   BUG();
+   BUG_ON(probe-registered);
 
ret = register_kprobe(probe-kprobe);
if (ret = 0) {
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] ARM: versatile: use BUG_ON where possible

2012-11-08 Thread Sasha Levin

Just use BUG_ON() instead of constructions such as:

if (...)
BUG()

A simplified version of the semantic patch that makes this transformation
is as follows: (http://coccinelle.lip6.fr/)

// smpl
@@
expression e;
@@
- if (e) BUG();
+ BUG_ON(e);
// /smpl

Signed-off-by: Sasha Levin sasha.le...@oracle.com
---
 arch/arm/mach-versatile/pci.c |9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/arm/mach-versatile/pci.c b/arch/arm/mach-versatile/pci.c
index 2f84f40..3936a11 100644
--- a/arch/arm/mach-versatile/pci.c
+++ b/arch/arm/mach-versatile/pci.c
@@ -82,12 +82,9 @@ static void __iomem *__pci_addr(struct pci_bus *bus,
/*
 * Trap out illegal values
 */
-   if (offset  255)
-   BUG();
-   if (busnr  255)
-   BUG();
-   if (devfn  255)
-   BUG();
+   BUG_ON(offset  255);
+   BUG_ON(busnr  255);
+   BUG_ON(devfn  255);
 
return VERSATILE_PCI_CFG_VIRT_BASE + ((busnr  16) |
(PCI_SLOT(devfn)  11) | (PCI_FUNC(devfn)  8) | offset);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] ARM: integrator: use BUG_ON where possible

2012-11-08 Thread Sasha Levin

Just use BUG_ON() instead of constructions such as:

if (...)
BUG()

A simplified version of the semantic patch that makes this transformation
is as follows: (http://coccinelle.lip6.fr/)

// smpl
@@
expression e;
@@
- if (e) BUG();
+ BUG_ON(e);
// /smpl

Signed-off-by: Sasha Levin sasha.le...@oracle.com
---
 arch/arm/mach-integrator/pci_v3.c |9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/arm/mach-integrator/pci_v3.c 
b/arch/arm/mach-integrator/pci_v3.c
index bbeca59..85938de 100644
--- a/arch/arm/mach-integrator/pci_v3.c
+++ b/arch/arm/mach-integrator/pci_v3.c
@@ -191,12 +191,9 @@ static void __iomem *v3_open_config_window(struct pci_bus 
*bus,
/*
 * Trap out illegal values
 */
-   if (offset  255)
-   BUG();
-   if (busnr  255)
-   BUG();
-   if (devfn  255)
-   BUG();
+   BUG_ON(offset  255);
+   BUG_ON(busnr  255);
+   BUG_ON(devfn  255);
 
if (busnr == 0) {
int slot = PCI_SLOT(devfn);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] ARM: dma: use BUG_ON where possible

2012-11-08 Thread Sasha Levin

Just use BUG_ON() instead of constructions such as:

if (...)
BUG()

A simplified version of the semantic patch that makes this transformation
is as follows: (http://coccinelle.lip6.fr/)

// smpl
@@
expression e;
@@
- if (e) BUG();
+ BUG_ON(e);
// /smpl

Signed-off-by: Sasha Levin sasha.le...@oracle.com
---
 arch/arm/mach-rpc/dma.c |3 +--
 arch/arm/mach-s3c64xx/dma.c |3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/arm/mach-rpc/dma.c b/arch/arm/mach-rpc/dma.c
index 85883b2..92e22ba 100644
--- a/arch/arm/mach-rpc/dma.c
+++ b/arch/arm/mach-rpc/dma.c
@@ -265,8 +265,7 @@ static void floppy_enable_dma(unsigned int chan, dma_t *dma)
unsigned int fiqhandler_length;
struct pt_regs regs;
 
-   if (fdma-dma.sg)
-   BUG();
+   BUG_ON(fdma-dma.sg);
 
if (fdma-dma.dma_mode == DMA_MODE_READ) {
extern unsigned char floppy_fiqin_start, floppy_fiqin_end;
diff --git a/arch/arm/mach-s3c64xx/dma.c b/arch/arm/mach-s3c64xx/dma.c
index f2a7a17..585c2ae 100644
--- a/arch/arm/mach-s3c64xx/dma.c
+++ b/arch/arm/mach-s3c64xx/dma.c
@@ -603,8 +603,7 @@ static irqreturn_t s3c64xx_dma_irq(int irq, void *pw)
 buff-next != chan-next)
buff = buff-next;
 
-   if (!buff)
-   BUG();
+   BUG_ON(!buff);
 
if (buff == chan-next)
buff = chan-end;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] ARM: OMAP1: use BUG_ON where possible

2012-11-08 Thread Sasha Levin

Just use BUG_ON() instead of constructions such as:

if (...)
BUG()

A simplified version of the semantic patch that makes this transformation
is as follows: (http://coccinelle.lip6.fr/)

// smpl
@@
expression e;
@@
- if (e) BUG();
+ BUG_ON(e);
// /smpl

Signed-off-by: Sasha Levin sasha.le...@oracle.com
---
 arch/arm/mach-omap1/board-fsample.c  |3 +--
 arch/arm/mach-omap1/board-h2.c   |3 +--
 arch/arm/mach-omap1/board-h3.c   |3 +--
 arch/arm/mach-omap1/board-perseus2.c |3 +--
 4 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/arm/mach-omap1/board-fsample.c 
b/arch/arm/mach-omap1/board-fsample.c
index 8b5800a..7ca6cc4 100644
--- a/arch/arm/mach-omap1/board-fsample.c
+++ b/arch/arm/mach-omap1/board-fsample.c
@@ -307,8 +307,7 @@ static void __init omap_fsample_init(void)
 
fsample_init_smc91x();
 
-   if (gpio_request(FSAMPLE_NAND_RB_GPIO_PIN, NAND ready)  0)
-   BUG();
+   BUG_ON(gpio_request(FSAMPLE_NAND_RB_GPIO_PIN, NAND ready)  0);
gpio_direction_input(FSAMPLE_NAND_RB_GPIO_PIN);
 
omap_cfg_reg(L3_1610_FLASH_CS2B_OE);
diff --git a/arch/arm/mach-omap1/board-h2.c b/arch/arm/mach-omap1/board-h2.c
index 9134b64..4953cf7 100644
--- a/arch/arm/mach-omap1/board-h2.c
+++ b/arch/arm/mach-omap1/board-h2.c
@@ -412,8 +412,7 @@ static void __init h2_init(void)
 
h2_nand_resource.end = h2_nand_resource.start = OMAP_CS2B_PHYS;
h2_nand_resource.end += SZ_4K - 1;
-   if (gpio_request(H2_NAND_RB_GPIO_PIN, NAND ready)  0)
-   BUG();
+   BUG_ON(gpio_request(H2_NAND_RB_GPIO_PIN, NAND ready)  0);
gpio_direction_input(H2_NAND_RB_GPIO_PIN);
 
omap_cfg_reg(L3_1610_FLASH_CS2B_OE);
diff --git a/arch/arm/mach-omap1/board-h3.c b/arch/arm/mach-omap1/board-h3.c
index bf213d1..563ba16 100644
--- a/arch/arm/mach-omap1/board-h3.c
+++ b/arch/arm/mach-omap1/board-h3.c
@@ -406,8 +406,7 @@ static void __init h3_init(void)
 
nand_resource.end = nand_resource.start = OMAP_CS2B_PHYS;
nand_resource.end += SZ_4K - 1;
-   if (gpio_request(H3_NAND_RB_GPIO_PIN, NAND ready)  0)
-   BUG();
+   BUG_ON(gpio_request(H3_NAND_RB_GPIO_PIN, NAND ready)  0);
gpio_direction_input(H3_NAND_RB_GPIO_PIN);
 
/* GPIO10 Func_MUX_CTRL reg bit 29:27, Configure V2 to mode1 as GPIO */
diff --git a/arch/arm/mach-omap1/board-perseus2.c 
b/arch/arm/mach-omap1/board-perseus2.c
index 030bd48..67c2612 100644
--- a/arch/arm/mach-omap1/board-perseus2.c
+++ b/arch/arm/mach-omap1/board-perseus2.c
@@ -275,8 +275,7 @@ static void __init omap_perseus2_init(void)
 
perseus2_init_smc91x();
 
-   if (gpio_request(P2_NAND_RB_GPIO_PIN, NAND ready)  0)
-   BUG();
+   BUG_ON(gpio_request(P2_NAND_RB_GPIO_PIN, NAND ready)  0);
gpio_direction_input(P2_NAND_RB_GPIO_PIN);
 
omap_cfg_reg(L3_1610_FLASH_CS2B_OE);
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] alpha: use BUG_ON where possible

2012-11-08 Thread Sasha Levin

Just use BUG_ON() instead of constructions such as:

if (...)
BUG()

A simplified version of the semantic patch that makes this transformation
is as follows: (http://coccinelle.lip6.fr/)

// smpl
@@
expression e;
@@
- if (e) BUG();
+ BUG_ON(e);
// /smpl

Signed-off-by: Sasha Levin sasha.le...@oracle.com
---
 arch/alpha/kernel/pci_iommu.c |   12 
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index 3f844d2..a21d0ab 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -354,8 +354,7 @@ static dma_addr_t alpha_pci_map_page(struct device *dev, 
struct page *page,
struct pci_dev *pdev = alpha_gendev_to_pci(dev);
int dac_allowed;
 
-   if (dir == PCI_DMA_NONE)
-   BUG();
+   BUG_ON(dir == PCI_DMA_NONE);
 
dac_allowed = pdev ? pci_dac_dma_supported(pdev, pdev-dma_mask) : 0; 
return pci_map_single_1(pdev, (char *)page_address(page) + offset, 
@@ -378,8 +377,7 @@ static void alpha_pci_unmap_page(struct device *dev, 
dma_addr_t dma_addr,
struct pci_iommu_arena *arena;
long dma_ofs, npages;
 
-   if (dir == PCI_DMA_NONE)
-   BUG();
+   BUG_ON(dir == PCI_DMA_NONE);
 
if (dma_addr = __direct_map_base
 dma_addr  __direct_map_base + __direct_map_size) {
@@ -662,8 +660,7 @@ static int alpha_pci_map_sg(struct device *dev, struct 
scatterlist *sg,
dma_addr_t max_dma;
int dac_allowed;
 
-   if (dir == PCI_DMA_NONE)
-   BUG();
+   BUG_ON(dir == PCI_DMA_NONE);
 
dac_allowed = dev ? pci_dac_dma_supported(pdev, pdev-dma_mask) : 0;
 
@@ -742,8 +739,7 @@ static void alpha_pci_unmap_sg(struct device *dev, struct 
scatterlist *sg,
dma_addr_t max_dma;
dma_addr_t fbeg, fend;
 
-   if (dir == PCI_DMA_NONE)
-   BUG();
+   BUG_ON(dir == PCI_DMA_NONE);
 
if (! alpha_mv.mv_pci_tbi)
return;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] ARM: EXYNOS: use BUG_ON where possible

2012-11-08 Thread Sasha Levin

Just use BUG_ON() instead of constructions such as:

if (...)
BUG()

A simplified version of the semantic patch that makes this transformation
is as follows: (http://coccinelle.lip6.fr/)

// smpl
@@
expression e;
@@
- if (e) BUG();
+ BUG_ON(e);
// /smpl

Signed-off-by: Sasha Levin sasha.le...@oracle.com
---
 arch/arm/mach-exynos/common.c |6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/arm/mach-exynos/common.c b/arch/arm/mach-exynos/common.c
index 4e577f6..6a55a5a 100644
--- a/arch/arm/mach-exynos/common.c
+++ b/arch/arm/mach-exynos/common.c
@@ -465,10 +465,8 @@ static void __init combiner_cascade_irq(unsigned int 
combiner_nr, unsigned int i
else
max_nr = EXYNOS4_MAX_COMBINER_NR;
 
-   if (combiner_nr = max_nr)
-   BUG();
-   if (irq_set_handler_data(irq, combiner_data[combiner_nr]) != 0)
-   BUG();
+   BUG_ON(combiner_nr = max_nr);
+   BUG_ON(irq_set_handler_data(irq, combiner_data[combiner_nr]) != 0);
irq_set_chained_handler(irq, combiner_handle_cascade_irq);
 }
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] kvm, async_pf: exit idleness when handling KVM_PV_REASON_PAGE_NOT_PRESENT

2012-10-19 Thread Sasha Levin

KVM_PV_REASON_PAGE_NOT_PRESENT kicks cpu out of idleness, but we haven't
marked that spot as an exit from idleness.

Not doing so can cause RCU warnings such as:

[  732.788386] ===
[  732.789803] [ INFO: suspicious RCU usage. ]
[  732.790032] 3.7.0-rc1-next-20121019-sasha-2-g6d8d02d-dirty #63 Tainted: 
GW
[  732.790032] ---
[  732.790032] include/linux/rcupdate.h:738 rcu_read_lock() used illegally 
while idle!
[  732.790032]
[  732.790032] other info that might help us debug this:
[  732.790032]
[  732.790032]
[  732.790032] RCU used illegally from idle CPU!
[  732.790032] rcu_scheduler_active = 1, debug_locks = 1
[  732.790032] RCU used illegally from extended quiescent state!
[  732.790032] 2 locks held by trinity-child31/8252:
[  732.790032]  #0:  (rq-lock){-.-.-.}, at: [83a67528] 
__schedule+0x178/0x8f0
[  732.790032]  #1:  (rcu_read_lock){.+.+..}, at: [81152bde] 
cpuacct_charge+0xe/0x200
[  732.790032]
[  732.790032] stack backtrace:
[  732.790032] Pid: 8252, comm: trinity-child31 Tainted: GW
3.7.0-rc1-next-20121019-sasha-2-g6d8d02d-dirty #63
[  732.790032] Call Trace:
[  732.790032]  [8118266b] lockdep_rcu_suspicious+0x10b/0x120
[  732.790032]  [81152c60] cpuacct_charge+0x90/0x200
[  732.790032]  [81152bde] ? cpuacct_charge+0xe/0x200
[  732.790032]  [81158093] update_curr+0x1a3/0x270
[  732.790032]  [81158a6a] dequeue_entity+0x2a/0x210
[  732.790032]  [81158ea5] dequeue_task_fair+0x45/0x130
[  732.790032]  [8114ae29] dequeue_task+0x89/0xa0
[  732.790032]  [8114bb9e] deactivate_task+0x1e/0x20
[  732.790032]  [83a67c29] __schedule+0x879/0x8f0
[  732.790032]  [8117e20d] ? trace_hardirqs_off+0xd/0x10
[  732.790032]  [810a37a5] ? kvm_async_pf_task_wait+0x1d5/0x2b0
[  732.790032]  [83a67cf5] schedule+0x55/0x60
[  732.790032]  [810a37c4] kvm_async_pf_task_wait+0x1f4/0x2b0
[  732.790032]  [81139e50] ? abort_exclusive_wait+0xb0/0xb0
[  732.790032]  [81139c25] ? prepare_to_wait+0x25/0x90
[  732.790032]  [810a3a66] do_async_page_fault+0x56/0xa0
[  732.790032]  [83a6a6e8] async_page_fault+0x28/0x30

Signed-off-by: Sasha Levin sasha.le...@oracle.com
---
 arch/x86/kernel/kvm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b3e5e51..4180a87 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -247,7 +247,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long 
error_code)
break;
case KVM_PV_REASON_PAGE_NOT_PRESENT:
/* page is swapped out by the host. */
+   rcu_irq_enter();
+   exit_idle();
kvm_async_pf_task_wait((u32)read_cr2());
+   rcu_irq_exit();
break;
case KVM_PV_REASON_PAGE_READY:
rcu_irq_enter();
-- 
1.7.12.3

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH resend] net, bluetooth: don't attempt to free a channel that wasn't created

2012-10-08 Thread Sasha Levin

We may currently attempt to free a channel which wasn't created due to
an error in the initialization path, this would cause a NULL ptr deref.

This would cause the following oops:

[   12.919073] BUG: unable to handle kernel NULL pointer dereference at 
0010
[   12.919131] IP: [836645c4] l2cap_chan_put+0x34/0x50
[   12.919135] PGD 0
[   12.919138] Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC
[   12.919193] Dumping ftrace buffer:
[   12.919242](ftrace buffer empty)
[   12.919314] Modules linked in:
[   12.919318] CPU 1
[   12.919319] Pid: 6210, comm: krfcommd Tainted: GW
3.6.0-next-20121004-sasha-5-gb010653-dirty #30
[   12.919374] RIP: 0010:[836645c4]  [836645c4] 
l2cap_chan_put+0x34/0x50
[   12.919377] RSP: :880066933c38  EFLAGS: 00010246
[   12.919378] RAX: 8366c780 RBX:  RCX: 6667
[   12.919379] RDX: 0fa0 RSI: 84d3f79e RDI: 0010
[   12.919381] RBP: 880066933c48 R08: 859989f8 R09: 0001
[   12.919382] R10:  R11: 7fff R12: 
[   12.919383] R13: 88009b00a200 R14: 88009b00a200 R15: 0001
[   12.919385] FS:  () GS:88003360() 
knlGS:
[   12.919437] CS:  0010 DS:  ES:  CR0: 80050033
[   12.919440] CR2: 0010 CR3: 05026000 CR4: 000406e0
[   12.919446] DR0:  DR1:  DR2: 
[   12.919451] DR3:  DR6: 0ff0 DR7: 0400
[   12.919504] Process krfcommd (pid: 6210, threadinfo 880066932000, task 
880065c4b000)
[   12.919506] Stack:
[   12.919510]  88009b00a200 880032084000 880066933c68 
8366c7bc
[   12.919513]  7fff 880032084000 880066933c98 
833ae0ae
[   12.919516]  880066933ca8   
88009b00a200
[   12.919517] Call Trace:
[   12.919522]  [8366c7bc] l2cap_sock_destruct+0x3c/0x80
[   12.919527]  [833ae0ae] __sk_free+0x1e/0x1f0
[   12.919530]  [833ae2f7] sk_free+0x17/0x20
[   12.919585]  [8366ca4e] l2cap_sock_alloc.constprop.5+0x9e/0xd0
[   12.919591]  [8366cb9e] l2cap_sock_create+0x7e/0x100
[   12.919652]  [83a4f32a] ? _raw_read_lock+0x6a/0x80
[   12.919658]  [836402c4] ? bt_sock_create+0x74/0x110
[   12.919660]  [83640308] bt_sock_create+0xb8/0x110
[   12.919664]  [833aa232] __sock_create+0x282/0x3b0
[   12.919720]  [833aa0b0] ? __sock_create+0x100/0x3b0
[   12.919725]  [836785b0] ? rfcomm_process_sessions+0x17e0/0x17e0
[   12.919779]  [833aa37f] sock_create_kern+0x1f/0x30
[   12.919784]  [83675714] rfcomm_l2sock_create+0x44/0x70
[   12.919787]  [836785b0] ? rfcomm_process_sessions+0x17e0/0x17e0
[   12.919790]  [836785fe] rfcomm_run+0x4e/0x1f0
[   12.919846]  [836785b0] ? rfcomm_process_sessions+0x17e0/0x17e0
[   12.919852]  [81138ee3] kthread+0xe3/0xf0
[   12.919908]  [8117b12e] ? put_lock_stats.isra.14+0xe/0x40
[   12.919914]  [81138e00] ? flush_kthread_work+0x1f0/0x1f0
[   12.919968]  [83a5077c] ret_from_fork+0x7c/0x90
[   12.919973]  [81138e00] ? flush_kthread_work+0x1f0/0x1f0
[   12.920161] Code: 83 ec 08 f6 05 ff 58 44 02 04 74 1b 8b 4f 10 48 89 fa 48 
c7 c6 d9 d7 d4 84 48 c7 c7 80 9e aa 85 31 c0 e8 80
ac 3a fe 48 8d 7b 10 f0 83 6b 10 01 0f 94 c0 84 c0 74 05 e8 8b e0 ff ff 48 83 
c4 08
[   12.920165] RIP  [836645c4] l2cap_chan_put+0x34/0x50
[   12.920166]  RSP 880066933c38
[   12.920167] CR2: 0010
[   12.920417] ---[ end trace 5a9114e8a158ab84 ]---

Introduced in commit 61d6ef3e (Bluetooth: Make better use of l2cap_chan
reference counting).

Signed-off-by: Sasha Levin sasha.le...@oracle.com
---
 net/bluetooth/l2cap_sock.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 083f2bf..66c295a 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1083,7 +1083,8 @@ static void l2cap_sock_destruct(struct sock *sk)
 {
BT_DBG(sk %p, sk);
 
-   l2cap_chan_put(l2cap_pi(sk)-chan);
+   if (l2cap_pi(sk)-chan)
+   l2cap_chan_put(l2cap_pi(sk)-chan);
if (l2cap_pi(sk)-rx_busy_skb) {
kfree_skb(l2cap_pi(sk)-rx_busy_skb);
l2cap_pi(sk)-rx_busy_skb = NULL;
-- 
1.7.12

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: blk: NULL ptr deref in blk_dequeue_request()

2012-10-09 Thread Sasha Levin

On 10/08/2012 05:45 PM, Jiri Kosina wrote:
 On Mon, 8 Oct 2012, Jan Kara wrote:
 
   I'm still seeing this on linux-next.
I think this is floppy related (see redo_fd_request() in the stack
  trace). And there were quite some changes to the area recently. Adding
  maintainer to CC.
 Hmm ... I don't immediately see how this is happening.
 
 Sasha, could you please do git bisect on drivers/block/floppy.c between 
 f6365201d and your git HEAD for starters (assuming that f6365201d works 
 well for you?).
 

A bisect on floppy.c yielded the following:

b33d002f4b6bae912463e5a66387c498aa69b6fe is the first bad commit
commit b33d002f4b6bae912463e5a66387c498aa69b6fe
Author: Ben Hutchings b...@decadent.org.uk
Date:   Mon Aug 27 20:56:53 2012 -0300

genhd: Make put_disk() safe for disks that have not been registered



Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: blk: NULL ptr deref in blk_dequeue_request()

2012-10-09 Thread Sasha Levin

On 10/09/2012 09:21 AM, Sasha Levin wrote:
 On 10/08/2012 05:45 PM, Jiri Kosina wrote:
 On Mon, 8 Oct 2012, Jan Kara wrote:

 I'm still seeing this on linux-next.
   I think this is floppy related (see redo_fd_request() in the stack
 trace). And there were quite some changes to the area recently. Adding
 maintainer to CC.
 Hmm ... I don't immediately see how this is happening.

 Sasha, could you please do git bisect on drivers/block/floppy.c between 
 f6365201d and your git HEAD for starters (assuming that f6365201d works 
 well for you?).

 
 A bisect on floppy.c yielded the following:
 
 b33d002f4b6bae912463e5a66387c498aa69b6fe is the first bad commit
 commit b33d002f4b6bae912463e5a66387c498aa69b6fe
 Author: Ben Hutchings b...@decadent.org.uk
 Date:   Mon Aug 27 20:56:53 2012 -0300
 
 genhd: Make put_disk() safe for disks that have not been registered

2 more things:

 1. The guest vm which I'm testing on doesn't emulate anything which even looks 
like a floppy.
 2. I'm seeing the following lines before the BUG:

[9.836604] floppy0: no floppy controllers found
[9.837246] work still pending
[9.837743] floppy0: floppy_shutdown: timeout handler died.


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/2] xattr: prevent NULL ptr deref warnings in __simple_xattr_set

2012-10-09 Thread Sasha Levin

On 09/14/2012 04:58 PM, Aristeu Rozanski wrote:
 On Fri, Sep 14, 2012 at 01:55:55PM -0700, Tejun Heo wrote:
 On Fri, Sep 14, 2012 at 01:54:34PM -0700, Tejun Heo wrote:
 On Fri, Sep 14, 2012 at 09:35:54PM +0200, Sasha Levin wrote:
 Prevent warnings generated by smatch due to unchecked dereference of
 'new_xattr' in __simple_xattr_set().

 Isn't this an actual bug w/ or w/o smatch?  Remove request (NULL
 @value) w/o XATTR_REPLACE for an non-existent node would end up
 calling list_add() on NULL, right?  If so, please collapse these two
 patches and mention the actual bug instead of smatch warning.

 And can somebody please make that function less confusing? -
 restructuring / commenting whatever.  It's doing something simple.
 It's not supposed to be this confusing.
 
 I'll work on that.
 

As it's still happening in linux-next, should I send a simple patch to fix it 
along
with Tejun's comments? Or is the rewrite of __simple_xattr_set() behind the 
corner?


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v3 06/10] mm: kill vma flag VM_CAN_NONLINEAR

2012-10-09 Thread Sasha Levin

On 07/31/2012 06:42 AM, Konstantin Khlebnikov wrote:
 This patch moves actual ptes filling for non-linear file mappings
 into special vma operation: -remap_pages().
 
 File system must implement this method to get non-linear mappings support,
 if it uses filemap_fault() then generic_file_remap_pages() can be used.
 
 Now device drivers can implement this method and obtain nonlinear vma support.
 
 Signed-off-by: Konstantin Khlebnikov khlebni...@openvz.org
 Cc: Alexander Viro v...@zeniv.linux.org.uk
 Cc: Nick Piggin npig...@kernel.dk
 Cc: Ingo Molnar mi...@redhat.com

I was fuzzing with trinity inside a KVM tools guest, and hit the following NULL 
deref:

[ 1202.209854] BUG: unable to handle kernel NULL pointer dereference at 
0040
[ 1202.215344] IP: [812290cf] sys_remap_file_pages+0xcf/0x380
[ 1202.215904] PGD 24ccc067 PUD 2f693067 PMD 0
[ 1202.215904] Oops:  [#2] PREEMPT SMP DEBUG_PAGEALLOC
[ 1202.215904] CPU 3
[ 1202.224995] Pid: 17953, comm: trinity-child3 Tainted: G  D W
3.6.0-next-20121009-sasha-1-ge404bae #43
[ 1202.224995] RIP: 0010:[812290cf]  [812290cf] 
sys_remap_file_pages+0xcf/0x380
[ 1202.224995] RSP: 0018:880025819f18  EFLAGS: 00010246
[ 1202.224995] RAX: 050444f9 RBX: 8010 RCX: 0001
[ 1202.224995] RDX:  RSI: 8010 RDI: 8800255f1000
[ 1202.279533] RBP: 880025819f78 R08: 88000c9ea580 R09: 
[ 1202.279533] R10: 0001 R11:  R12: 8800255f10a8
[ 1202.279533] R13:  R14: 8800255f1000 R15: 8070
[ 1202.279533] FS:  7fa063d0e700() GS:88006760() 
knlGS:
[ 1202.279533] CS:  0010 DS:  ES:  CR0: 80050033
[ 1202.279533] CR2: 0040 CR3: 2cc81000 CR4: 000406e0
[ 1202.279533] DR0:  DR1:  DR2: 
[ 1202.279533] DR3:  DR6: 0ff0 DR7: 0400
[ 1202.279533] Process trinity-child3 (pid: 17953, threadinfo 880025818000, 
task 88003061b000)
[ 1202.279533] Stack:
[ 1202.279533]  880025819f48 8107dc10 8010 

[ 1202.279533]  0060 0aefbf86 00d8 
8010
[ 1202.279533]  0003 00d8 0060 
00d8
[ 1202.279533] Call Trace:
[ 1202.279533]  [8107dc10] ? syscall_trace_enter+0x20/0x2e0
[ 1202.279533]  [83a64738] tracesys+0xe1/0xe6
[ 1202.279533] Code: 02 00 00 48 8b 40 30 a8 08 0f 84 6d 02 00 00 49 83 b8 a0 
00 00 00 00 74 0b a9 00 00 80 00 0f 84 58 02 00 00
49 8b 90 88 00 00 00 48 83 7a 40 00 0f 84 46 02 00 00 49 8b 50 08 48 39 d3 0f 
82 39
[ 1202.279533] RIP  [812290cf] sys_remap_file_pages+0xcf/0x380
[ 1202.279533]  RSP 880025819f18
[ 1202.279533] CR2: 0040
[ 1202.401144] ---[ end trace fe8a5604834bab83 ]---

It would seem that this patch adds the following check into 
sys_remap_file_pages():

if (!vma-vm_ops-remap_pages)
goto out;

But vma-vm_ops itself is NULL.


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] of/mdio: fix build errors when CONFIG_OF isn't set

2012-10-10 Thread Sasha Levin

Commit f9dc9ac5 (of/mdio: Add dummy functions in of_mdio.h.) has added
empty stubs into of_mdio.h which were not static, this causes build errors
when these symbols were defined across several objects.

Fix it by marking those stubs as 'static inline'.

Signed-off-by: Sasha Levin sasha.le...@oracle.com
---
 include/linux/of_mdio.h | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index 6ef49b8..7f17b9c 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -26,17 +26,17 @@ extern struct phy_device *of_phy_connect_fixed_link(struct 
net_device *dev,
 extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np);
 
 #else /* CONFIG_OF */
-int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
+static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node 
*np)
 {
return -ENOSYS;
 }
 
-struct phy_device *of_phy_find_device(struct device_node *phy_np)
+static inline struct phy_device *of_phy_find_device(struct device_node *phy_np)
 {
return NULL;
 }
 
-struct phy_device *of_phy_connect(struct net_device *dev,
+static inline struct phy_device *of_phy_connect(struct net_device *dev,
 struct device_node *phy_np,
 void (*hndlr)(struct net_device *),
 u32 flags, phy_interface_t iface)
@@ -44,14 +44,14 @@ struct phy_device *of_phy_connect(struct net_device *dev,
return NULL;
 }
 
-struct phy_device *of_phy_connect_fixed_link(struct net_device *dev,
+static inline struct phy_device *of_phy_connect_fixed_link(struct net_device 
*dev,
 void (*hndlr)(struct net_device *),
 phy_interface_t iface)
 {
return NULL;
 }
 
-struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np)
+static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np)
 {
return NULL;
 }
-- 
1.7.12

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [Patch 3/7] smpboot: Provide infrastructure for percpu hotplug threads

2012-10-11 Thread Sasha Levin

On Wed, Sep 19, 2012 at 5:47 PM, Sasha Levin levinsasha...@gmail.com wrote:
 Hi Thomas,

 On 07/16/2012 12:42 PM, Thomas Gleixner wrote:
 Provide a generic interface for setting up and tearing down percpu
 threads.

 On registration the threads for already online cpus are created and
 started. On deregistration (modules) the threads are stoppped.

 During hotplug operations the threads are created, started, parked and
 unparked. The datastructure for registration provides a pointer to
 percpu storage space and optional setup, cleanup, park, unpark
 functions. These functions are called when the thread state changes.

 Each implementation has to provide a function which is queried and
 returns whether the thread should run and the thread function itself.

 The core code handles all state transitions and avoids duplicated code
 in the call sites.

 Signed-off-by: Thomas Gleixner t...@linutronix.de
 ---

 This patch seems to cause the following BUG() on KVM guests with large amount 
 of
 VCPUs:

 [0.511760] [ cut here ]
 [0.511761] kernel BUG at kernel/smpboot.c:134!
 [0.511764] invalid opcode:  [#3] PREEMPT SMP DEBUG_PAGEALLOC
 [0.511779] CPU 0
 [0.511780] Pid: 70, comm: watchdog/10 Tainted: G  D W
 3.6.0-rc6-next-20120919-sasha-1-gb54aafe #365
 [0.511783] RIP: 0010:[81141676]  [81141676]
 smpboot_thread_fn+0x196/0x2e0
 [0.511785] RSP: 0018:88000cf4bdd0  EFLAGS: 00010206
 [0.511786] RAX:  RBX: 88000cf58000 RCX: 
 
 [0.511787] RDX:  RSI: 0001 RDI: 
 0001
 [0.511788] RBP: 88000cf4be30 R08:  R09: 
 0001
 [0.511789] R10:  R11:  R12: 
 88000cdb9ff0
 [0.511790] R13: 84c60920 R14: 000a R15: 
 88000cf58000
 [0.511792] FS:  () GS:88000d20()
 knlGS:
 [0.511794] CS:  0010 DS:  ES:  CR0: 8005003b
 [0.511795] CR2:  CR3: 04c26000 CR4: 
 000406f0
 [0.511801] DR0:  DR1:  DR2: 
 
 [0.511805] DR3:  DR6: 0ff0 DR7: 
 0400
 [0.511807] Process watchdog/10 (pid: 70, threadinfo 88000cf4a000, task
 88000cf58000)
 [0.511808] Stack:
 [0.511822]  88000cf4bfd8 88000cf4bfd8  
 
 [0.511833]  88000cf4be00 839eace5 88000cf4be30 
 88000cdd1c68
 [0.511844]  88000cdb9ff0 811414e0  
 
 [0.511845] Call Trace:
 [0.511852]  [839eace5] ? schedule+0x55/0x60
 [0.511857]  [811414e0] ? __smpboot_create_thread+0xf0/0xf0
 [0.511863]  [81135c13] kthread+0xe3/0xf0
 [0.511867]  [839eb463] ? wait_for_common+0x143/0x180
 [0.511873]  [839ef044] kernel_thread_helper+0x4/0x10
 [0.511878]  [839ed3b4] ? retint_restore_args+0x13/0x13
 [0.511883]  [81135b30] ? insert_kthread_work+0x90/0x90
 [0.511888]  [839ef040] ? gs_change+0x13/0x13
 [0.511916] Code: 24 04 02 00 00 00 0f 1f 80 00 00 00 00 e8 b3 46 ff ff e9 
 b6
 fe ff ff 66 0f 1f 44 00 00 45 8b 34 24 e8 ff 72 8a 00 41 39 c6 74 0a 0f 0b 
 0f
 1f 84 00 00 00 00 00 41 8b 44 24 04 85 c0 74 0f 83 f8
 [0.511919] RIP  [81141676] smpboot_thread_fn+0x196/0x2e0
 [0.511920]  RSP 88000cf4bdd0
 [0.511922] ---[ end trace 127920ef70923ae1 ]---

 I'm starting the guest with numa=fake=10, so vcpu 0 ends up on the same (fake)
 node as vcpu 10, and while digging into the bug, it seems that the issue is 
 that
 vcpu10's thread gets scheduled on vcpu0.

 Beyond that I don't really understand what's wrong...

Ping? Still seeing that with linux-next.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: blk: NULL ptr deref in blk_dequeue_request()

2012-10-12 Thread Sasha Levin

Hi Ben,

On Wed, Oct 10, 2012 at 11:52 AM, Ben Hutchings b...@decadent.org.uk wrote:
 On Tue, 2012-10-09 at 09:26 -0400, Sasha Levin wrote:
 On 10/09/2012 09:21 AM, Sasha Levin wrote:
  On 10/08/2012 05:45 PM, Jiri Kosina wrote:
  On Mon, 8 Oct 2012, Jan Kara wrote:
 
  I'm still seeing this on linux-next.
I think this is floppy related (see redo_fd_request() in the stack
  trace). And there were quite some changes to the area recently. Adding
  maintainer to CC.
  Hmm ... I don't immediately see how this is happening.
 
  Sasha, could you please do git bisect on drivers/block/floppy.c between
  f6365201d and your git HEAD for starters (assuming that f6365201d works
  well for you?).
 
 
  A bisect on floppy.c yielded the following:
 
  b33d002f4b6bae912463e5a66387c498aa69b6fe is the first bad commit
  commit b33d002f4b6bae912463e5a66387c498aa69b6fe
  Author: Ben Hutchings b...@decadent.org.uk
  Date:   Mon Aug 27 20:56:53 2012 -0300
 
  genhd: Make put_disk() safe for disks that have not been registered

 2 more things:

  1. The guest vm which I'm testing on doesn't emulate anything which even 
 looks like a floppy.
  2. I'm seeing the following lines before the BUG:

 [9.836604] floppy0: no floppy controllers found
 [9.837246] work still pending
 [9.837743] floppy0: floppy_shutdown: timeout handler died.

 I see two problems:

 1. redo_fd_request() races with tear-down of the disks, but because
 set_next_request() checks disk-queue before doing anything this was
 usually harmless.  Now that do_floppy_init() doesn't clear disk-queue,
 the race condition is much easier to hit.  This may fix that problem in
 do_floppy_init(), though there appear to be worse bugs in tear-down
 order in floppy_module_exit():

 --- a/drivers/block/floppy.c
 +++ b/drivers/block/floppy.c
 @@ -4320,13 +4320,13 @@ out_unreg_region:
  out_unreg_blkdev:
 unregister_blkdev(FLOPPY_MAJOR, fd);
  out_put_disk:
 +   destroy_workqueue(floppy_wq);
 while (dr--) {
 del_timer_sync(motor_off_timer[dr]);
 if (disks[dr]-queue)
 blk_cleanup_queue(disks[dr]-queue);
 put_disk(disks[dr]);
 }
 -   destroy_workqueue(floppy_wq);
 return err;
  }

 --- END ---

 2. I made a big mistake in using the existing GENHD_FL_UP flag, as it is
 cleared by del_gendisk().  Incremental patch below, but it should be
 squashed into the previous patch if that branch is still rebase-able.

 Ben.

 ---
 From: Ben Hutchings b...@decadent.org.uk
 Date: Wed, 10 Oct 2012 16:17:01 +0100
 Subject: [PATCH] genhd: Make put_disk() safe again for disks that *have* been
  registered

 Commit b33d002 ('genhd: Make put_disk() safe for disks that have not
 been registered') wrongly used the GENHD_FL_UP flag to test whether a
 disk held a reference to its queue.  Since this is cleared by
 del_gendisk(), queues will not be properly cleaned up if a disk has
 been registered and then torn down in the normal way.  Introduce a
 new flag for this purpose.

 Signed-off-by: Ben Hutchings b...@decadent.org.uk
 ---
  block/genhd.c |7 +--
  include/linux/genhd.h |1 +
  2 files changed, 6 insertions(+), 2 deletions(-)

 diff --git a/block/genhd.c b/block/genhd.c
 index 633751d..b5f482f 100644
 --- a/block/genhd.c
 +++ b/block/genhd.c
 @@ -617,7 +617,10 @@ void add_disk(struct gendisk *disk)
  * Take an extra ref on queue which will be put on disk_release()
  * so that it sticks around as long as @disk is there.
  */
 -   WARN_ON_ONCE(!blk_get_queue(disk-queue));
 +   if (blk_get_queue(disk-queue))
 +   disk-flags |= GENHD_FL_GOT_QUEUE;
 +   else
 +   WARN_ON(1);

 retval = sysfs_create_link(disk_to_dev(disk)-kobj, bdi-dev-kobj,
bdi);
 @@ -1105,7 +1108,7 @@ static void disk_release(struct device *dev)
 disk_replace_part_tbl(disk, NULL);
 free_part_stats(disk-part0);
 free_part_info(disk-part0);
 -   if (disk-queue  disk-flags  GENHD_FL_UP)
 +   if (disk-queue  disk-flags  GENHD_FL_GOT_QUEUE)
 blk_put_queue(disk-queue);
 kfree(disk);
  }
 diff --git a/include/linux/genhd.h b/include/linux/genhd.h
 index 4f440b3..7c2560c 100644
 --- a/include/linux/genhd.h
 +++ b/include/linux/genhd.h
 @@ -134,6 +134,7 @@ struct hd_struct {
  #define GENHD_FL_NATIVE_CAPACITY   128
  #define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE256
  #define GENHD_FL_NO_PART_SCAN  512
 +#define GENHD_FL_GOT_QUEUE 1024

  enum {
 DISK_EVENT_MEDIA_CHANGE = 1  0, /* media changed */

I'm now seeing these instead:

[   34.823972] general protection fault:  [#1] PREEMPT SMP DEBUG_PAGEALLOC
[   34.830888] Dumping ftrace buffer:
[   34.830888](ftrace buffer empty)
[   34.830888] CPU 5
[   34.830888] Pid: 6, comm: kworker/u:0 Tainted: GW
3.6.0

Re: [patch for-3.7] mm, mempolicy: fix printing stack contents in numa_maps

2012-10-24 Thread Sasha Levin

On Wed, Oct 17, 2012 at 1:24 AM, David Rientjes rient...@google.com wrote:
 On Wed, 17 Oct 2012, Dave Jones wrote:

 BUG: sleeping function called from invalid context at kernel/mutex.c:269
 in_atomic(): 1, irqs_disabled(): 0, pid: 8558, name: trinity-child2
 3 locks on stack by trinity-child2/8558:
  #0: held: (p-lock){+.+.+.}, instance: 88010c9a00b0, at: 
 [8120cd1f] seq_lseek+0x3f/0x120
  #1: held: (mm-mmap_sem){++}, instance: 88013956f7c8, at: 
 [81254437] m_start+0xa7/0x190
  #2: held: ((p-alloc_lock)-rlock){+.+...}, instance: 
 88011fc64f30, at: [81254f8f] show_numa_map+0x14f/0x610
 Pid: 8558, comm: trinity-child2 Not tainted 3.7.0-rc1+ #32
 Call Trace:
  [810ae4ec] __might_sleep+0x14c/0x200
  [816bdf4e] mutex_lock_nested+0x2e/0x50
  [811c43a3] mpol_shared_policy_lookup+0x33/0x90
  [8118d5c3] shmem_get_policy+0x33/0x40
  [811c31fa] get_vma_policy+0x3a/0x90
  [81254fa3] show_numa_map+0x163/0x610
  [81255b10] ? pid_maps_open+0x20/0x20
  [81255980] ? pagemap_hugetlb_range+0xf0/0xf0
  [81255483] show_pid_numa_map+0x13/0x20
  [8120c902] traverse+0xf2/0x230
  [8120cd8b] seq_lseek+0xab/0x120
  [811e6c0b] sys_lseek+0x7b/0xb0
  [816ca088] tracesys+0xe1/0xe6


 Hmm, looks like we need to change the refcount semantics entirely.  We'll
 need to make get_vma_policy() always take a reference and then drop it
 accordingly.  This work sif get_vma_policy() can grab a reference while
 holding task_lock() for the task policy fallback case.

 Comments on this approach?
 ---
[snip]

I'm not sure about the status of the patch, but it doesn't apply on
top of -next, and I still
see the warnings when fuzzing on -next.


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch for-3.7] mm, mempolicy: fix printing stack contents in numa_maps

2012-10-24 Thread Sasha Levin

On Wed, Oct 24, 2012 at 7:34 PM, David Rientjes rient...@google.com wrote:
 On Wed, 24 Oct 2012, Sasha Levin wrote:

 I'm not sure about the status of the patch, but it doesn't apply on
 top of -next, and I still
 see the warnings when fuzzing on -next.


 This should be fixed by 9e7814404b77 (hold task-mempolicy while
 numa_maps scans.) in 3.7-rc2, can you reproduce any issues reading
 /proc/pid/numa_maps on that kernel?

I was actually referring to the warnings Dave Jones saw when fuzzing
with trinity after the
original patch was applied.

I still see the following when fuzzing:

[  338.467156] BUG: sleeping function called from invalid context at
kernel/mutex.c:269
[  338.473719] in_atomic(): 1, irqs_disabled(): 0, pid: 6361, name: trinity-main
[  338.481199] 2 locks held by trinity-main/6361:
[  338.486629]  #0:  (mm-mmap_sem){++}, at: [810aa314]
__do_page_fault+0x1e4/0x4f0
[  338.498783]  #1:  ((mm-page_table_lock)-rlock){+.+...}, at:
[8122f017] handle_pte_fault+0x3f7/0x6a0
[  338.511409] Pid: 6361, comm: trinity-main Tainted: GW
3.7.0-rc2-next-20121024-sasha-1-gd95ef01-dirty #74
[  338.530318] Call Trace:
[  338.534088]  [8114e393] __might_sleep+0x1c3/0x1e0
[  338.539358]  [83ae5209] mutex_lock_nested+0x29/0x50
[  338.545253]  [8124fc3e] mpol_shared_policy_lookup+0x2e/0x90
[  338.545258]  [81219ebe] shmem_get_policy+0x2e/0x30
[  338.545264]  [8124e99a] get_vma_policy+0x5a/0xa0
[  338.545267]  [8124fce1] mpol_misplaced+0x41/0x1d0
[  338.545272]  [8122f085] handle_pte_fault+0x465/0x6a0
[  338.545278]  [81131e04] ? __rcu_read_unlock+0x44/0xb0
[  338.545282]  [81230baa] handle_mm_fault+0x32a/0x360
[  338.545286]  [810aa5b0] __do_page_fault+0x480/0x4f0
[  338.545293]  [8111a706] ? del_timer+0x26/0x80
[  338.545298]  [811c7313] ? rcu_cleanup_after_idle+0x23/0x170
[  338.545302]  [811ca9a4] ? rcu_eqs_exit_common+0x64/0x3a0
[  338.545305]  [811c8c66] ? rcu_eqs_enter_common+0x7c6/0x970
[  338.545309]  [811cafdc] ? rcu_eqs_exit+0x9c/0xb0
[  338.545312]  [810aa666] do_page_fault+0x26/0x40
[  338.545317]  [810a3a40] do_async_page_fault+0x30/0xa0
[  338.545321]  [83ae9268] async_page_fault+0x28/0x30
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch for-3.7] mm, mempolicy: fix printing stack contents in numa_maps

2012-10-25 Thread Sasha Levin

On 10/25/2012 10:39 AM, Peter Zijlstra wrote:
 On Thu, 2012-10-25 at 14:19 +0200, Peter Zijlstra wrote:
 On Wed, 2012-10-24 at 17:08 -0700, David Rientjes wrote:
 Ok, this looks the same but it's actually a different issue: 
 mpol_misplaced(), which now only exists in linux-next and not in 3.7-rc2, 
 calls get_vma_policy() which may take the shared policy mutex.  This 
 happens while holding page_table_lock from do_huge_pmd_numa_page() but 
 also from do_numa_page() while holding a spinlock on the ptl, which is 
 coming from the sched/numa branch.

 Is there anyway that we can avoid changing the shared policy mutex back 
 into a spinlock (it was converted in b22d127a39dd [mempolicy: fix a race 
 in shared_policy_replace()])?

 Adding Peter, Rik, and Mel to the cc. 

 Urgh, crud I totally missed that.

 So the problem is that we need to compute if the current page is placed
 'right' while holding pte_lock in order to avoid multiple pte_lock
 acquisitions on the 'fast' path.

 I'll look into this in a bit, but one thing that comes to mind is having
 both a spnilock and a mutex and require holding both for modification
 while either one is sufficient for read.

 That would allow sp_lookup() to use the spinlock, while insert and
 replace can hold both.

 Not sure it will work for this, need to stare at this code a little
 more.
 
 So I think the below should work, we hold the spinlock over both rb-tree
 modification as sp free, this makes mpol_shared_policy_lookup() which
 returns the policy with an incremented refcount work with just the
 spinlock.
 
 Comments?
 
 ---

It made the warnings I've reported go away.


Thanks,
Sasha

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 21/21] TTY: move tty buffers to tty_port

2012-10-25 Thread Sasha Levin

Hi guys,

On 10/18/2012 04:26 PM, Jiri Slaby wrote:
 So this is it. The big step why we did all the work over the past
 kernel releases. Now everything is prepared, so nothing protects us
 from doing that big step.
 
|  |\  \ /^l  |  |
|  | \  / /   |  |
|  '-,.__   =\/   ,-`=  |  '-,.__
| O __.´´)(  .`   | O __.´´)
 ~~~   ~~  ``  ~~~   ~~
 The buffers are now in the tty_port structure and we can start
 teaching the buffer helpers (insert char/string, flip etc.) to use
 tty_port instead of tty_struct all around.
 
 Signed-off-by: Jiri Slaby jsl...@suse.cz
 ---

Fuzzing with trinity inside a KVM tools (lkvm) guest with -next kernel
uncovered the following warning:

[ 1339.448706] [ cut here ]
[ 1339.451224] WARNING: at drivers/tty/tty_buffer.c:476 
flush_to_ldisc+0x60/0x200()
[ 1339.454272] tty is NULLPid: 7147, comm: kworker/4:0 Tainted: GW
3.7.0-rc2-next-20121025-sasha-1-g673f98e-dirty #75
[ 1339.458693] Call Trace:
[ 1339.459410]  [81bb1ea0] ? flush_to_ldisc+0x60/0x200
[ 1339.461289]  [81109b86] warn_slowpath_common+0x86/0xb0
[ 1339.462992]  [81109c11] warn_slowpath_fmt+0x41/0x50
[ 1339.464772]  [81bb1ea0] flush_to_ldisc+0x60/0x200
[ 1339.467076]  [8112d5a9] process_one_work+0x3b9/0x770
[ 1339.469501]  [8112d458] ? process_one_work+0x268/0x770
[ 1339.472053]  [8112dcc1] ? worker_thread+0x51/0x3f0
[ 1339.473831]  [81bb1e40] ? __tty_buffer_request_room+0x180/0x180
[ 1339.475834]  [8112df2a] worker_thread+0x2ba/0x3f0
[ 1339.478027]  [8112dc70] ? rescuer_thread+0x2d0/0x2d0
[ 1339.480431]  [81138c33] kthread+0xe3/0xf0
[ 1339.482383]  [8117d7be] ? put_lock_stats.isra.16+0xe/0x40
[ 1339.484171]  [81138b50] ? insert_kthread_work+0x90/0x90
[ 1339.485886]  [83aedebc] ret_from_fork+0x7c/0xb0
[ 1339.487943]  [81138b50] ? insert_kthread_work+0x90/0x90
[ 1339.490435] ---[ end trace e01a8b0af77894c4 ]---

I'm guessing it happens because we never cancel the scheduled work when we
free the buffer, so the scheduled work may run even after we freed the buffer.

Besides the warning itself, I think that 'tty is NULL' would need a newline
after it. Greg, should I send a patch for that?


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [BUG] lkvm crash on crashkernel boot

2012-10-25 Thread Sasha Levin

On Thu, Oct 25, 2012 at 8:16 AM, Kirill A. Shutemov
kirill.shute...@linux.intel.com wrote:
 On Thu, Oct 25, 2012 at 10:17:27AM +0300, Pekka Enberg wrote:
 On Wed, Oct 24, 2012 at 6:27 PM, Kirill A. Shutemov
 kirill.shute...@linux.intel.com wrote:
  Hi,
 
  I've tried to play with kexec using lkvm. Unfortunately, lkvm crashes when
  I try to switch to crashkernel.
 
  I use Linus tree + penberg/kvmtool/next + one x86 mm patch[1].
 
  Kernel is defconfig + kvmconfig. I use the same kernel image for system and
  crash env.
 
  Host:
 
  % lkvm run --cpus 1 -m 1024 --params 'crashkernel=256M loglevel=8'
 
  Guest:
 
  # kexec -p bzImage --reuse-cmdline
  # echo c  /proc/sysrq-trigger
  ...
  [0.947984] loop: module loaded
  [0.950078] virtio-pci :00:01.0: irq 40 for MSI/MSI-X
  [0.950925] virtio-pci :00:01.0: irq 41 for MSI/MSI-X
  [0.952944] virtio-pci :00:01.0: irq 42 for MSI/MSI-X
  zsh: segmentation fault (core dumped)  lkvm run --cpus 1 -m 1024 --params 
  'crashkernel=256M loglevel=8'

 This seems to work OK on my machine.

  Guest kernel is somewhere in virtio_net initialization (for the second
  time). I'm too lazy to find exact line.
 
  Backtrace:
 
  0  irq__add_msix_route (kvm=kvm@entry=0xbf8010, msg=0xe3d090) at 
  x86/irq.c:210
  #1  0x0041b3bf in virtio_pci__specific_io_out.isra.5 
  (offset=optimized out,
  data=optimized out, kvm=0xbf8010) at virtio/pci.c:150
  #2  virtio_pci__io_out.9406 (ioport=optimized out, kvm=0xbf8010, 
  port=optimized out,
  data=optimized out, size=optimized out) at virtio/pci.c:208
  #3  0x0040f8c3 in kvm__emulate_io (count=optimized out, size=2, 
  direction=1,
  data=optimized out, port=25108, kvm=0xbf8010) at ioport.c:165
  #4  kvm_cpu__start (cpu=optimized out) at 
  x86/include/kvm/kvm-cpu-arch.h:41
  #5  0x00416ca2 in kvm_cpu_thread.2824 (arg=optimized out) at 
  builtin-run.c:176
  #6  0x7f701ebd0b50 in start_thread (arg=optimized out) at 
  pthread_create.c:304
  #7  0x7f701e1fe70d in clone () at 
  ../sysdeps/unix/sysv/linux/x86_64/clone.S:112
  #8  0x in ?? ()

 Looks like vpci-msix_table might not be initialized properly. Sasha,
 Asias, care to take a look at this?

 vec is 0x in virtio_pci__specific_io_out() on crash.

 Let's add proper bounds checking there. It doesn't not solves the issue
 with booting crashkernel, but fix lkvm crash.

 With the patch below I've got:

 [0.988004] NET: Registered protocol family 17
 [0.988550] 9pnet: Installing 9P2000 support
 [0.989006] virtio-pci :00:02.0: irq 40 for MSI/MSI-X
 [0.989889] virtio-pci :00:02.0: irq 41 for MSI/MSI-X
 [0.991117] virtio-pci :00:02.0: irq 40 for MSI/MSI-X
 [0.991716] virtio-pci :00:02.0: irq 41 for MSI/MSI-X
 [0.993028] 9pnet_virtio: probe of virtio1 failed with error -2
 [0.993811] virtio-pci :00:03.0: irq 40 for MSI/MSI-X
 [0.993895] virtio-pci :00:03.0: irq 41 for MSI/MSI-X
 [0.995186] virtio-pci :00:03.0: irq 40 for MSI/MSI-X
 [0.995899] virtio-pci :00:03.0: irq 41 for MSI/MSI-X
 [0.997030] 9pnet_virtio: probe of virtio2 failed with error -2
 [0.997891] Key type dns_resolver registered
 [0.998536] PM: Hibernation image not present or could not be loaded.
 [0.998902] registered taskstats version 1
 [1.001163]   Magic number: 0:241:128
 [1.001887] console [netcon0] enabled
 [1.002881] netconsole: network logging started
 [1.175863] Switching to clocksource tsc
 [   13.017445] ALSA device list:
 [   13.017834]   No soundcards found.
 [   13.018382] md: Waiting for all devices to be available before
 autodetect
 [   13.019090] md: If you don't use raid, use raid=noautodetect
 [   13.019867] md: Autodetecting RAID arrays.
 [   13.020280] md: Scanned 0 and added 0 devices.
 [   13.020728] md: autorun ...
 [   13.021008] md: ... autorun DONE.
 [   13.021405] 9pnet_virtio: no channels available
 [   13.021958] VFS: Cannot open root device root or unknown-block(0,0):
 error -2
 [   13.022749] Please append a correct root= boot option; here are the
 available partitions:
 [   13.023641] Kernel panic - not syncing: VFS: Unable to mount root fs on
 unknown-block(0,0)
 [   13.024462] Pid: 1, comm: swapper/0 Not tainted 3.7.0-rc2+ #20
 [   13.024638] Call Trace:
 [   13.024638]  [8174ae94] panic+0xb6/0x1b5
 [   13.024638]  [81cc7e0c] mount_block_root+0x183/0x221
 [   13.024638]  [81cc7fa4] mount_root+0xfa/0x105
 [   13.024638]  [81cc80ec] prepare_namespace+0x13d/0x16a
 [   13.024638]  [81729ee6] kernel_init+0x1c6/0x2e0
 [   13.024638]  [81cc75af] ? do_early_param+0x8c/0x8c
 [   13.024638]  [81729d20] ? rest_init+0x70/0x70
 [   13.024638]  [8175db2c] ret_from_fork+0x7c/0xb0
 [   13.024638]  [81729d20] ? rest_init+0x70/0x70
 [   13.024638] Rebooting in 1 seconds..  Warning: serial8250__exit failed.


   # KVM session ended normally.

tty, vt: lockdep warnings

2012-10-25 Thread Sasha Levin

Hi all,

While fuzzing with trinity inside a KVM tools (lkvm) guest running latest -next 
kernel,
I've stumbled on the following spew:

[  603.081796] ==
[  603.081797] [ INFO: possible circular locking dependency detected ]
[  603.081800] 3.7.0-rc2-next-20121025-sasha-1-g673f98e-dirty #77 Tainted: 
GW
[  603.081801] ---
[  603.081802] kworker/0:1/902 is trying to acquire lock:
[  603.081815]  ((fb_notifier_list).rwsem){.+.+.+}, at: [8114136f] 
__blocking_notifier_call_chain+0x7f/0xc0
[  603.081815]
[  603.081815] but task is already holding lock:
[  603.081822]  (console_lock){+.+.+.}, at: [81bc833e] 
console_callback+0xe/0x130
[  603.081823]
[  603.081823] which lock already depends on the new lock.
[  603.081823]
[  603.081824]
[  603.081824] the existing dependency chain (in reverse order) is:
[  603.081827]
[  603.081827] - #1 (console_lock){+.+.+.}:
[  603.081832][8118569a] lock_acquire+0x1aa/0x240
[  603.081837][8110b6e8] console_lock+0x68/0x70
[  603.081841][81bc786b] register_con_driver+0x1b/0x110
[  603.081844][81bc7a99] take_over_console+0x29/0x60
[  603.081848][81a6ee33] fbcon_takeover+0x63/0xc0
[  603.081851][81a724af] fbcon_event_notify+0x33f/0x730
[  603.081854][81140e8e] notifier_call_chain+0xee/0x130
[  603.081857][81141388] 
__blocking_notifier_call_chain+0x98/0xc0
[  603.081860][811413c1] 
blocking_notifier_call_chain+0x11/0x20
[  603.081863][81a635b6] fb_notifier_call_chain+0x16/0x20
[  603.081866][81a6638d] register_framebuffer+0x24d/0x290
[  603.081871][83961605] vga16fb_probe+0x1c0/0x227
[  603.081876][81e63192] platform_drv_probe+0x12/0x20
[  603.081879][81e61bb5] driver_probe_device+0x155/0x340
[  603.081881][81e61e7e] __device_attach+0x2e/0x50
[  603.081884][81e5fff6] bus_for_each_drv+0x56/0xa0
[  603.081887][81e617a8] device_attach+0x88/0xc0
[  603.081889][81e60236] bus_probe_device+0x36/0xd0
[  603.081892][81e5e0bf] device_add+0x4df/0x750
[  603.081895][81e63aa8] platform_device_add+0x1e8/0x280
[  603.081900][85b11144] vga16fb_init+0x8d/0xbb
[  603.081905][85acecb2] do_one_initcall+0x7a/0x135
[  603.081908][8392eb19] kernel_init+0x299/0x470
[  603.081912][83aedebc] ret_from_fork+0x7c/0xb0
[  603.081915]
[  603.081915] - #0 ((fb_notifier_list).rwsem){.+.+.+}:
[  603.081918][811828df] __lock_acquire+0x14df/0x1ca0
[  603.081921][8118569a] lock_acquire+0x1aa/0x240
[  603.081924][83aea187] down_read+0x47/0x90
[  603.081927][8114136f] 
__blocking_notifier_call_chain+0x7f/0xc0
[  603.081930][811413c1] 
blocking_notifier_call_chain+0x11/0x20
[  603.081932][81a635b6] fb_notifier_call_chain+0x16/0x20
[  603.081934][81a65306] fb_blank+0x36/0xa0
[  603.081938][81a729ee] fbcon_blank+0x14e/0x2d0
[  603.081941][81bc7c93] do_blank_screen+0x1b3/0x2b0
[  603.081943][81bc83f3] console_callback+0xc3/0x130
[  603.081946][8112d5a9] process_one_work+0x3b9/0x770
[  603.081949][8112df2a] worker_thread+0x2ba/0x3f0
[  603.081951][81138c33] kthread+0xe3/0xf0
[  603.081954][83aedebc] ret_from_fork+0x7c/0xb0
[  603.081955]
[  603.081955] other info that might help us debug this:
[  603.081955]
[  603.081956]  Possible unsafe locking scenario:
[  603.081956]
[  603.081956]CPU0CPU1
[  603.081957]
[  603.081959]   lock(console_lock);
[  603.081961]lock((fb_notifier_list).rwsem);
[  603.081962]lock(console_lock);
[  603.081964]   lock((fb_notifier_list).rwsem);
[  603.081964]
[  603.081964]  *** DEADLOCK ***
[  603.081964]
[  603.081966] 3 locks held by kworker/0:1/902:
[  603.081971]  #0:  (events){.+.+.+}, at: [8112d458] 
process_one_work+0x268/0x770
[  603.081976]  #1:  (console_work){+.+...}, at: [8112d458] 
process_one_work+0x268/0x770
[  603.081981]  #2:  (console_lock){+.+.+.}, at: [81bc833e] 
console_callback+0xe/0x130
[  603.081981]
[  603.081981] stack backtrace:
[  603.081984] Pid: 902, comm: kworker/0:1 Tainted: GW
3.7.0-rc2-next-20121025-sasha-1-g673f98e-dirty #77
[  603.081985] Call Trace:
[  603.081990]  [83a90609] print_circular_bug+0x1fb/0x20c
[  603.081994]  [811828df] __lock_acquire+0x14df/0x1ca0
[  603.081997]  [8117cfda] ? __bfs+0x16a/0x220
[  603.082000]  [8118569a]

Re: mm: NULL ptr deref in anon_vma_interval_tree_verify

2012-10-25 Thread Sasha Levin

On 10/18/2012 06:46 PM, Sasha Levin wrote:
 Hi all,
 
 While fuzzing with trinity inside a KVM tools (lkvm) guest, on today's 
 linux-next kernel,
 I saw the following:
 
 [ 1857.278176] BUG: unable to handle kernel NULL pointer dereference at 
 0090
 [ 1857.283725] IP: [81229d0f] anon_vma_interval_tree_verify+0xf/0xa0
 [ 1857.283725] PGD 6e19e067 PUD 6e19f067 PMD 0
 [ 1857.283725] Oops:  [#1] PREEMPT SMP DEBUG_PAGEALLOC
 [ 1857.283725] Dumping ftrace buffer:
 [ 1857.283725](ftrace buffer empty)
 [ 1857.283725] CPU 2
 [ 1857.283725] Pid: 15637, comm: trinity-child18 Tainted: GW
 3.7.0-rc1-next-20121018-sasha-2-g60a870d-dirty #61
 [ 1857.283725] RIP: 0010:[81229d0f]  [81229d0f] 
 anon_vma_interval_tree_verify+0xf/0xa0
 [ 1857.283725] RSP: 0018:88007df0fce8  EFLAGS: 00010296
 [ 1857.283725] RAX: 880089db1000 RBX: 880089db0ff0 RCX: 
 8800869e6928
 [ 1857.283725] RDX:  RSI: 880089db1008 RDI: 
 880089db0ff0
 [ 1857.283725] RBP: 88007df0fcf8 R08: 88006427d508 R09: 
 88012bb95f20
 [ 1857.283725] R10: 0001 R11: 8800c8525c60 R12: 
 88006e199370
 [ 1857.283725] R13: 88006e199300 R14:  R15: 
 880089db1000
 [ 1857.283725] FS:  7f322fd4c700() GS:88004d60() 
 knlGS:
 [ 1857.283725] CS:  0010 DS:  ES:  CR0: 80050033
 [ 1857.283725] CR2: 0090 CR3: 6e19d000 CR4: 
 000406e0
 [ 1857.283725] DR0:  DR1:  DR2: 
 
 [ 1857.283725] DR3:  DR6: 0ff0 DR7: 
 0400
 [ 1857.283725] Process trinity-child18 (pid: 15637, threadinfo 
 88007df0e000, task 88007ac8)
 [ 1857.283725] Stack:
 [ 1857.283725]  88007df0fd38 880089db0ff0 88007df0fd48 
 81233b58
 [ 1857.283725]  88007df0fd38 880089db1000 80d0 
 880089db1000
 [ 1857.283725]  88012bb95f20 885d97c8 885d97d8 
 880089db1000
 [ 1857.283725] Call Trace:
 [ 1857.283725]  [81233b58] validate_mm+0x58/0x1e0
 [ 1857.283725]  [81233da4] vma_link+0x94/0xe0
 [ 1857.283725]  [83a67fd4] ? _raw_spin_unlock_irqrestore+0x84/0xb0
 [ 1857.283725]  [81235f75] mmap_region+0x3f5/0x5c0
 [ 1857.283725]  [812363f7] do_mmap_pgoff+0x2b7/0x330
 [ 1857.283725]  [81220fd1] ? vm_mmap_pgoff+0x61/0xa0
 [ 1857.283725]  [81220fea] vm_mmap_pgoff+0x7a/0xa0
 [ 1857.283725]  [81234c72] sys_mmap_pgoff+0x182/0x1a0
 [ 1857.283725]  [8107dc40] ? syscall_trace_enter+0x20/0x2e0
 [ 1857.283725]  [810738dd] sys_mmap+0x1d/0x20
 [ 1857.283725]  [83a69ad8] tracesys+0xe1/0xe6
 [ 1857.283725] Code: 48 39 ce 77 9e f3 c3 0f 1f 44 00 00 31 c0 c3 66 66 66 66 
 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 53 48 89 fb
 48 83 ec 08 48 8b 17 48 8b 8a 90 00 00 00 48 39 4f 40 74 34 80 3d a6 82 5b 
 04 00 75
 [ 1857.283725] RIP  [81229d0f] 
 anon_vma_interval_tree_verify+0xf/0xa0
 [ 1857.283725]  RSP 88007df0fce8
 [ 1857.283725] CR2: 0090
 [ 1858.611277] ---[ end trace b51cc425e9b07fc0 ]---
 
 The obvious part is that anon_vma_interval_tree_verify() got called with node 
 == NULL, but when
 looking at the caller:
 
 list_for_each_entry(avc, vma-anon_vma_chain, same_vma)
 anon_vma_interval_tree_verify(avc);
 
 How it got called with said NULL becomes less obvious.

I've hit a similar one with today's -next. It isn't exactly the same, but
I suspect it's the same issue.

[ 1523.657950] BUG: unable to handle kernel paging request at fff0
[ 1523.660022] IP: [8122c29c] anon_vma_interval_tree_verify+0xc/0xa0
[ 1523.660022] PGD 4e28067 PUD 4e29067 PMD 0
[ 1523.675725] Oops:  [#1] PREEMPT SMP DEBUG_PAGEALLOC
[ 1523.750066] CPU 0
[ 1523.750066] Pid: 9050, comm: trinity-child64 Tainted: GW
3.7.0-rc2-next-20121025-sasha-1-g673f98e-dirty #77
[ 1523.750066] RIP: 0010:[8122c29c]  [8122c29c] 
anon_vma_interval_tree_verify+0xc/0xa0
[ 1523.750066] RSP: 0018:880045f81d48  EFLAGS: 00010296
[ 1523.750066] RAX:  RBX: fff0 RCX: 
[ 1523.750066] RDX:  RSI: 0001 RDI: fff0
[ 1523.750066] RBP: 880045f81d58 R08:  R09: 0f14
[ 1523.750066] R10: 0f12 R11:  R12: 8800096c8d70
[ 1523.750066] R13: 8800096c8d00 R14:  R15: 8800095b45e0
[ 1523.750066] FS:  7f7a923f3700() GS:88001360() 
knlGS:
[ 1523.750066] CS:  0010 DS:  ES:  CR0: 80050033
[ 1523.750066] CR2: fff0 CR3: 0969d000 CR4: 000406f0
[ 1523.750066] DR0:  DR1:  DR2: 
[ 1523.750066] DR3:  DR6: 0ff0 DR7

ipc, msgqueue: NULL ptr deref in msgrcv

2012-10-25 Thread Sasha Levin

Hi all,

While fuzzing with trinity inside a KVM tools (lkvm) guest running latest -next,
I've stumbled on the follwing:

[   80.110944] NULL pointer dereference at 0011
[   80.110944] IP: [8190cf90] testmsg.isra.5+0x40/0x70
[   80.110944] PGD 23812067 PUD 23811067 PMD 0
[   80.110944] Oops:  [#1] PREEMPT SMP DEBUG_PAGEALLOC
[   80.110944] Dumping ftrace buffer:
[   80.110944](ftrace buffer empty)
[   80.110944] CPU 1
[   80.110944] Pid: 6452, comm: trinity-child84 Tainted: GW
3.7.0-rc2-next-20121025-sasha-1-g673f98e-dirty #77
[   80.110944] RIP: 0010:[8190cf90]  [8190cf90] 
testmsg.isra.5+0x40/0x70
[   80.110944] RSP: 0018:88004dda9e78  EFLAGS: 00010246
[   80.110944] RAX:  RBX: 0002 RCX: 000f
[   80.110944] RDX: 0002 RSI: 282c796a RDI: 0011
[   80.110944] RBP: 88004dda9e78 R08:  R09: 
[   80.110944] R10: 0001 R11: 0001 R12: 282c796a
[   80.110944] R13: 8800621ab7c0 R14: 0001 R15: 8800621ab710
[   80.110944] FS:  7f777fd4d700() GS:88002780() 
knlGS:
[   80.110944] CS:  0010 DS:  ES:  CR0: 80050033
[   80.110944] CR2: 0011 CR3: 24bf9000 CR4: 000406e0
[   80.110944] DR0:  DR1:  DR2: 
[   80.110944] DR3:  DR6: 0ff0 DR7: 0400
[   80.110944] Process trinity-child84 (pid: 6452, threadinfo 88004dda8000, 
task 88004dd9b000)
[   80.110944] Stack:
[   80.110944]  88004dda9f68 8190e2ce 001d7840 
88004dd9b000
[   80.110944]  88004dd9b000 88004dd9b000  
8190cda0
[   80.110944]   854e3160  

[   80.110944] Call Trace:
[   80.110944]  [8190e2ce] do_msgrcv+0x1ae/0x640
[   80.110944]  [8190cda0] ? load_msg+0x190/0x190
[   80.110944]  [8107dc30] ? syscall_trace_enter+0x20/0x2e0
[   80.110944]  [8190e770] sys_msgrcv+0x10/0x20
[   80.110944]  [83aee198] tracesys+0xe1/0xe6
[   80.110944] Code: eb 51 66 0f 1f 84 00 00 00 00 00 83 fa 03 74 2b 83 fa 04 
75 2e 48 39 37 b8 01 00 00 00 7f 24 eb 32 66 2e 0f
1f 84 00 00 00 00 00 48 3b 37 75 13 eb 19 66 0f 1f 84 00 00 00 00 00 48 3b 37 
75 0b
[   80.110944] RIP  [8190cf90] testmsg.isra.5+0x40/0x70
[   80.110944]  RSP 88004dda9e78
[   80.110944] CR2: 0011
[   80.246961] ---[ end trace 1d24d488413d3159 ]---

case SEARCH_EQUAL:
if (msg-m_type == type) --- here
return 1;

My guess is that we managed to get testmsg() called without INIT_LIST_HEAD() 
somehow,
but I'm still not sure why.


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: ipc, msgqueue: NULL ptr deref in msgrcv

2012-10-27 Thread Sasha Levin

On 10/25/2012 08:43 PM, Sasha Levin wrote:
 Hi all,
 
 While fuzzing with trinity inside a KVM tools (lkvm) guest running latest 
 -next,
 I've stumbled on the follwing:
 
 [   80.110944] NULL pointer dereference at 0011
 [   80.110944] IP: [8190cf90] testmsg.isra.5+0x40/0x70
 [   80.110944] PGD 23812067 PUD 23811067 PMD 0
 [   80.110944] Oops:  [#1] PREEMPT SMP DEBUG_PAGEALLOC
 [   80.110944] Dumping ftrace buffer:
 [   80.110944](ftrace buffer empty)
 [   80.110944] CPU 1
 [   80.110944] Pid: 6452, comm: trinity-child84 Tainted: GW
 3.7.0-rc2-next-20121025-sasha-1-g673f98e-dirty #77
 [   80.110944] RIP: 0010:[8190cf90]  [8190cf90] 
 testmsg.isra.5+0x40/0x70
 [   80.110944] RSP: 0018:88004dda9e78  EFLAGS: 00010246
 [   80.110944] RAX:  RBX: 0002 RCX: 
 000f
 [   80.110944] RDX: 0002 RSI: 282c796a RDI: 
 0011
 [   80.110944] RBP: 88004dda9e78 R08:  R09: 
 
 [   80.110944] R10: 0001 R11: 0001 R12: 
 282c796a
 [   80.110944] R13: 8800621ab7c0 R14: 0001 R15: 
 8800621ab710
 [   80.110944] FS:  7f777fd4d700() GS:88002780() 
 knlGS:
 [   80.110944] CS:  0010 DS:  ES:  CR0: 80050033
 [   80.110944] CR2: 0011 CR3: 24bf9000 CR4: 
 000406e0
 [   80.110944] DR0:  DR1:  DR2: 
 
 [   80.110944] DR3:  DR6: 0ff0 DR7: 
 0400
 [   80.110944] Process trinity-child84 (pid: 6452, threadinfo 
 88004dda8000, task 88004dd9b000)
 [   80.110944] Stack:
 [   80.110944]  88004dda9f68 8190e2ce 001d7840 
 88004dd9b000
 [   80.110944]  88004dd9b000 88004dd9b000  
 8190cda0
 [   80.110944]   854e3160  
 
 [   80.110944] Call Trace:
 [   80.110944]  [8190e2ce] do_msgrcv+0x1ae/0x640
 [   80.110944]  [8190cda0] ? load_msg+0x190/0x190
 [   80.110944]  [8107dc30] ? syscall_trace_enter+0x20/0x2e0
 [   80.110944]  [8190e770] sys_msgrcv+0x10/0x20
 [   80.110944]  [83aee198] tracesys+0xe1/0xe6
 [   80.110944] Code: eb 51 66 0f 1f 84 00 00 00 00 00 83 fa 03 74 2b 83 fa 04 
 75 2e 48 39 37 b8 01 00 00 00 7f 24 eb 32 66 2e 0f
 1f 84 00 00 00 00 00 48 3b 37 75 13 eb 19 66 0f 1f 84 00 00 00 00 00 48 3b 
 37 75 0b
 [   80.110944] RIP  [8190cf90] testmsg.isra.5+0x40/0x70
 [   80.110944]  RSP 88004dda9e78
 [   80.110944] CR2: 0011
 [   80.246961] ---[ end trace 1d24d488413d3159 ]---
 
   case SEARCH_EQUAL:
   if (msg-m_type == type) --- here
   return 1;
 
 My guess is that we managed to get testmsg() called without INIT_LIST_HEAD() 
 somehow,
 but I'm still not sure why.
 
 
 Thanks,
 Sasha
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
 

My auto-bisect script just finished running. Looks like it's caused by:

e32811fe04c759faf63c93b470e787b2328ceb04 is the first bad commit
commit e32811fe04c759faf63c93b470e787b2328ceb04
Author: Stanislav Kinsbursky skinsbur...@parallels.com
Date:   Thu Oct 25 12:15:18 2012 +1100

ipc: message queue copy feature introduced


Thanks,
Sasha
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v7 01/16] hashtable: introduce a small and naive hashtable

2012-10-28 Thread Sasha Levin

This hashtable implementation is using hlist buckets to provide a simple
hashtable to prevent it from getting reimplemented all over the kernel.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---

Sorry for the long delay, I was busy with a bunch of personal things.

Changes since v6:

 - Use macros that point to internal static inline functions instead of
 implementing everything as a macro.
 - Rebase on latest -next.
 - Resending the enter patch series on request.
 - Break early from hash_empty() if found to be non-empty.
 - DECLARE_HASHTABLE/DEFINE_HASHTABLE.


 include/linux/hashtable.h | 193 ++
 1 file changed, 193 insertions(+)
 create mode 100644 include/linux/hashtable.h

diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h
new file mode 100644
index 000..1fb8c97
--- /dev/null
+++ b/include/linux/hashtable.h
@@ -0,0 +1,193 @@
+/*
+ * Statically sized hash table implementation
+ * (C) 2012  Sasha Levin levinsasha...@gmail.com
+ */
+
+#ifndef _LINUX_HASHTABLE_H
+#define _LINUX_HASHTABLE_H
+
+#include linux/list.h
+#include linux/types.h
+#include linux/kernel.h
+#include linux/hash.h
+#include linux/rculist.h
+
+#define DEFINE_HASHTABLE(name, bits)   
\
+   struct hlist_head name[1  bits] = 
\
+   { [0 ... ((1  bits) - 1)] = HLIST_HEAD_INIT }
+
+#define DECLARE_HASHTABLE(name, bits)  
\
+   struct hlist_head name[1  (bits)]
+
+#define HASH_SIZE(name) (ARRAY_SIZE(name))
+#define HASH_BITS(name) ilog2(HASH_SIZE(name))
+
+/* Use hash_32 when possible to allow for fast 32bit hashing in 64bit kernels. 
*/
+#define hash_min(val, bits)
\
+({ 
\
+   sizeof(val) = 4 ?  
\
+   hash_32(val, bits) :
\
+   hash_long(val, bits);   
\
+})
+
+static inline void __hash_init(struct hlist_head *ht, int sz)
+{
+   int i;
+
+   for (i = 0; i  sz; i++)
+   INIT_HLIST_HEAD(ht[sz]);
+}
+
+/**
+ * hash_init - initialize a hash table
+ * @hashtable: hashtable to be initialized
+ *
+ * Calculates the size of the hashtable from the given parameter, otherwise
+ * same as hash_init_size.
+ *
+ * This has to be a macro since HASH_BITS() will not work on pointers since
+ * it calculates the size during preprocessing.
+ */
+#define hash_init(hashtable) __hash_init(hashtable, HASH_SIZE(hashtable))
+
+/**
+ * hash_add - add an object to a hashtable
+ * @hashtable: hashtable to add to
+ * @node: the struct hlist_node of the object to be added
+ * @key: the key of the object to be added
+ */
+#define hash_add(hashtable, node, key) 
\
+   hlist_add_head(node, hashtable[hash_min(key, HASH_BITS(hashtable))]);
+
+/**
+ * hash_add_rcu - add an object to a rcu enabled hashtable
+ * @hashtable: hashtable to add to
+ * @node: the struct hlist_node of the object to be added
+ * @key: the key of the object to be added
+ */
+#define hash_add_rcu(hashtable, node, key) 
\
+   hlist_add_head_rcu(node, hashtable[hash_min(key, 
HASH_BITS(hashtable))]);
+
+/**
+ * hash_hashed - check whether an object is in any hashtable
+ * @node: the struct hlist_node of the object to be checked
+ */
+#define hash_hashed(node) (!hlist_unhashed(node))
+
+static inline bool __hash_empty(struct hlist_head *ht, int sz)
+{
+   int i;
+
+   for (i = 0; i  sz; i++)
+   if (!hlist_empty(ht[i]))
+   return false;
+
+   return true;
+}
+
+/**
+ * hash_empty - check whether a hashtable is empty
+ * @hashtable: hashtable to check
+ *
+ * This has to be a macro since HASH_BITS() will not work on pointers since
+ * it calculates the size during preprocessing.
+ */
+#define hash_empty(hashtable) __hash_empty(hashtable, HASH_SIZE(hashtable))
+
+/**
+ * hash_del - remove an object from a hashtable
+ * @node: struct hlist_node of the object to remove
+ */
+static inline void hash_del(struct hlist_node *node)
+{
+   hlist_del_init(node);
+}
+
+/**
+ * hash_del_rcu - remove an object from a rcu enabled hashtable
+ * @node: struct hlist_node of the object to remove
+ */
+static inline void hash_del_rcu(struct hlist_node *node)
+{
+   hlist_del_init_rcu(node);
+}
+
+/**
+ * hash_for_each - iterate over a hashtable
+ * @name: hashtable to iterate
+ * @bkt: integer to use as bucket loop cursor
+ * @node: the struct list_head to use as a loop cursor for each entry
+ * @obj: the type * to use as a loop cursor for each entry
+ * @member: the name of the hlist_node within the struct
+ */
+#define hash_for_each(name, bkt, node, obj, member

[PATCH v7 04/16] workqueue: use new hashtable implementation

2012-10-28 Thread Sasha Levin

Switch workqueues to use the new hashtable implementation. This reduces the 
amount of
generic unrelated code in the workqueues.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 kernel/workqueue.c | 86 ++
 1 file changed, 15 insertions(+), 71 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a1135c6..8f6e1bf 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -41,6 +41,7 @@
 #include linux/debug_locks.h
 #include linux/lockdep.h
 #include linux/idr.h
+#include linux/hashtable.h
 
 #include workqueue_sched.h
 
@@ -82,8 +83,6 @@ enum {
NR_WORKER_POOLS = 2,/* # worker pools per gcwq */
 
BUSY_WORKER_HASH_ORDER  = 6,/* 64 pointers */
-   BUSY_WORKER_HASH_SIZE   = 1  BUSY_WORKER_HASH_ORDER,
-   BUSY_WORKER_HASH_MASK   = BUSY_WORKER_HASH_SIZE - 1,
 
MAX_IDLE_WORKERS_RATIO  = 4,/* 1/4 of busy can be idle */
IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */
@@ -180,7 +179,7 @@ struct global_cwq {
unsigned intflags;  /* L: GCWQ_* flags */
 
/* workers are chained either in busy_hash or pool idle_list */
-   struct hlist_head   busy_hash[BUSY_WORKER_HASH_SIZE];
+   DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
/* L: hash of busy workers */
 
struct worker_pool  pools[NR_WORKER_POOLS];
@@ -285,8 +284,7 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
 (pool)  (gcwq)-pools[NR_WORKER_POOLS]; (pool)++)
 
 #define for_each_busy_worker(worker, i, pos, gcwq) \
-   for (i = 0; i  BUSY_WORKER_HASH_SIZE; i++) \
-   hlist_for_each_entry(worker, pos, gcwq-busy_hash[i], hentry)
+   hash_for_each(gcwq-busy_hash, i, pos, worker, hentry)
 
 static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask,
  unsigned int sw)
@@ -857,63 +855,6 @@ static inline void worker_clr_flags(struct worker *worker, 
unsigned int flags)
 }
 
 /**
- * busy_worker_head - return the busy hash head for a work
- * @gcwq: gcwq of interest
- * @work: work to be hashed
- *
- * Return hash head of @gcwq for @work.
- *
- * CONTEXT:
- * spin_lock_irq(gcwq-lock).
- *
- * RETURNS:
- * Pointer to the hash head.
- */
-static struct hlist_head *busy_worker_head(struct global_cwq *gcwq,
-  struct work_struct *work)
-{
-   const int base_shift = ilog2(sizeof(struct work_struct));
-   unsigned long v = (unsigned long)work;
-
-   /* simple shift and fold hash, do we need something better? */
-   v = base_shift;
-   v += v  BUSY_WORKER_HASH_ORDER;
-   v = BUSY_WORKER_HASH_MASK;
-
-   return gcwq-busy_hash[v];
-}
-
-/**
- * __find_worker_executing_work - find worker which is executing a work
- * @gcwq: gcwq of interest
- * @bwh: hash head as returned by busy_worker_head()
- * @work: work to find worker for
- *
- * Find a worker which is executing @work on @gcwq.  @bwh should be
- * the hash head obtained by calling busy_worker_head() with the same
- * work.
- *
- * CONTEXT:
- * spin_lock_irq(gcwq-lock).
- *
- * RETURNS:
- * Pointer to worker which is executing @work if found, NULL
- * otherwise.
- */
-static struct worker *__find_worker_executing_work(struct global_cwq *gcwq,
-  struct hlist_head *bwh,
-  struct work_struct *work)
-{
-   struct worker *worker;
-   struct hlist_node *tmp;
-
-   hlist_for_each_entry(worker, tmp, bwh, hentry)
-   if (worker-current_work == work)
-   return worker;
-   return NULL;
-}
-
-/**
  * find_worker_executing_work - find worker which is executing a work
  * @gcwq: gcwq of interest
  * @work: work to find worker for
@@ -932,8 +873,14 @@ static struct worker *__find_worker_executing_work(struct 
global_cwq *gcwq,
 static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
 struct work_struct *work)
 {
-   return __find_worker_executing_work(gcwq, busy_worker_head(gcwq, work),
-   work);
+   struct worker *worker;
+   struct hlist_node *tmp;
+
+   hash_for_each_possible(gcwq-busy_hash, worker, tmp, hentry, (unsigned 
long)work)
+   if (worker-current_work == work)
+   return worker;
+
+   return NULL;
 }
 
 /**
@@ -2160,7 +2107,6 @@ __acquires(gcwq-lock)
struct cpu_workqueue_struct *cwq = get_work_cwq(work);
struct worker_pool *pool = worker-pool;
struct global_cwq *gcwq = pool-gcwq;
-   struct hlist_head *bwh = busy_worker_head(gcwq, work);
bool cpu_intensive = cwq-wq-flags  WQ_CPU_INTENSIVE;
work_func_t f = work-func

[PATCH v7 05/16] mm/huge_memory: use new hashtable implementation

2012-10-28 Thread Sasha Levin

Switch hugemem to use the new hashtable implementation. This reduces the amount 
of
generic unrelated code in the hugemem.

This also removes the dymanic allocation of the hash table. The size of the 
table is
constant so there's no point in paying the price of an extra dereference when 
accessing
it.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 mm/huge_memory.c | 55 ++-
 1 file changed, 14 insertions(+), 41 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3c14a96..38ce8e9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -19,6 +19,7 @@
 #include linux/mman.h
 #include linux/pagemap.h
 #include linux/migrate.h
+#include linux/hashtable.h
 #include asm/tlb.h
 #include asm/pgalloc.h
 #include internal.h
@@ -59,12 +60,12 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
 static unsigned int khugepaged_max_ptes_none __read_mostly = HPAGE_PMD_NR-1;
 
 static int khugepaged(void *none);
-static int mm_slots_hash_init(void);
 static int khugepaged_slab_init(void);
 static void khugepaged_slab_free(void);
 
-#define MM_SLOTS_HASH_HEADS 1024
-static struct hlist_head *mm_slots_hash __read_mostly;
+#define MM_SLOTS_HASH_BITS 10
+static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
+
 static struct kmem_cache *mm_slot_cache __read_mostly;
 
 /**
@@ -545,12 +546,6 @@ static int __init hugepage_init(void)
if (err)
goto out;
 
-   err = mm_slots_hash_init();
-   if (err) {
-   khugepaged_slab_free();
-   goto out;
-   }
-
/*
 * By default disable transparent hugepages on smaller systems,
 * where the extra memory used could hurt more than TLB overhead
@@ -1673,6 +1668,8 @@ static int __init khugepaged_slab_init(void)
if (!mm_slot_cache)
return -ENOMEM;
 
+   hash_init(mm_slots_hash);
+
return 0;
 }
 
@@ -1694,47 +1691,23 @@ static inline void free_mm_slot(struct mm_slot *mm_slot)
kmem_cache_free(mm_slot_cache, mm_slot);
 }
 
-static int __init mm_slots_hash_init(void)
-{
-   mm_slots_hash = kzalloc(MM_SLOTS_HASH_HEADS * sizeof(struct hlist_head),
-   GFP_KERNEL);
-   if (!mm_slots_hash)
-   return -ENOMEM;
-   return 0;
-}
-
-#if 0
-static void __init mm_slots_hash_free(void)
-{
-   kfree(mm_slots_hash);
-   mm_slots_hash = NULL;
-}
-#endif
-
 static struct mm_slot *get_mm_slot(struct mm_struct *mm)
 {
-   struct mm_slot *mm_slot;
-   struct hlist_head *bucket;
+   struct mm_slot *slot;
struct hlist_node *node;
 
-   bucket = mm_slots_hash[((unsigned long)mm / sizeof(struct mm_struct))
-   % MM_SLOTS_HASH_HEADS];
-   hlist_for_each_entry(mm_slot, node, bucket, hash) {
-   if (mm == mm_slot-mm)
-   return mm_slot;
-   }
+   hash_for_each_possible(mm_slots_hash, slot, node, hash, (unsigned long) 
mm)
+   if (slot-mm == mm)
+   return slot;
+
return NULL;
 }
 
 static void insert_to_mm_slots_hash(struct mm_struct *mm,
struct mm_slot *mm_slot)
 {
-   struct hlist_head *bucket;
-
-   bucket = mm_slots_hash[((unsigned long)mm / sizeof(struct mm_struct))
-   % MM_SLOTS_HASH_HEADS];
mm_slot-mm = mm;
-   hlist_add_head(mm_slot-hash, bucket);
+   hash_add(mm_slots_hash, mm_slot-hash, (long)mm);
 }
 
 static inline int khugepaged_test_exit(struct mm_struct *mm)
@@ -1803,7 +1776,7 @@ void __khugepaged_exit(struct mm_struct *mm)
spin_lock(khugepaged_mm_lock);
mm_slot = get_mm_slot(mm);
if (mm_slot  khugepaged_scan.mm_slot != mm_slot) {
-   hlist_del(mm_slot-hash);
+   hash_del(mm_slot-hash);
list_del(mm_slot-mm_node);
free = 1;
}
@@ -2252,7 +2225,7 @@ static void collect_mm_slot(struct mm_slot *mm_slot)
 
if (khugepaged_test_exit(mm)) {
/* free mm_slot */
-   hlist_del(mm_slot-hash);
+   hash_del(mm_slot-hash);
list_del(mm_slot-mm_node);
 
/*
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v7 07/16] net,9p: use new hashtable implementation

2012-10-28 Thread Sasha Levin

Switch 9p error table to use the new hashtable implementation. This reduces the 
amount of
generic unrelated code in 9p.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 net/9p/error.c | 21 ++---
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/net/9p/error.c b/net/9p/error.c
index 2ab2de7..a5cc7dd 100644
--- a/net/9p/error.c
+++ b/net/9p/error.c
@@ -34,7 +34,7 @@
 #include linux/jhash.h
 #include linux/errno.h
 #include net/9p/9p.h
-
+#include linux/hashtable.h
 /**
  * struct errormap - map string errors from Plan 9 to Linux numeric ids
  * @name: string sent over 9P
@@ -50,8 +50,8 @@ struct errormap {
struct hlist_node list;
 };
 
-#define ERRHASHSZ  32
-static struct hlist_head hash_errmap[ERRHASHSZ];
+#define ERR_HASH_BITS 5
+static DEFINE_HASHTABLE(hash_errmap, ERR_HASH_BITS);
 
 /* FixMe - reduce to a reasonable size */
 static struct errormap errmap[] = {
@@ -193,18 +193,17 @@ static struct errormap errmap[] = {
 int p9_error_init(void)
 {
struct errormap *c;
-   int bucket;
+   u32 hash;
 
/* initialize hash table */
-   for (bucket = 0; bucket  ERRHASHSZ; bucket++)
-   INIT_HLIST_HEAD(hash_errmap[bucket]);
+   hash_init(hash_errmap);
 
/* load initial error map into hash table */
for (c = errmap; c-name != NULL; c++) {
c-namelen = strlen(c-name);
-   bucket = jhash(c-name, c-namelen, 0) % ERRHASHSZ;
+   hash = jhash(c-name, c-namelen, 0);
INIT_HLIST_NODE(c-list);
-   hlist_add_head(c-list, hash_errmap[bucket]);
+   hash_add(hash_errmap, c-list, hash);
}
 
return 1;
@@ -223,13 +222,13 @@ int p9_errstr2errno(char *errstr, int len)
int errno;
struct hlist_node *p;
struct errormap *c;
-   int bucket;
+   u32 hash;
 
errno = 0;
p = NULL;
c = NULL;
-   bucket = jhash(errstr, len, 0) % ERRHASHSZ;
-   hlist_for_each_entry(c, p, hash_errmap[bucket], list) {
+   hash = jhash(errstr, len, 0);
+   hash_for_each_possible(hash_errmap, c, p, list, hash) {
if (c-namelen == len  !memcmp(c-name, errstr, len)) {
errno = c-val;
break;
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v7 08/16] block,elevator: use new hashtable implementation

2012-10-28 Thread Sasha Levin

Switch elevator to use the new hashtable implementation. This reduces the 
amount of
generic unrelated code in the elevator.

This also removes the dymanic allocation of the hash table. The size of the 
table is
constant so there's no point in paying the price of an extra dereference when 
accessing
it.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 block/blk.h  |  2 +-
 block/elevator.c | 23 ---
 include/linux/elevator.h |  5 -
 3 files changed, 9 insertions(+), 21 deletions(-)

diff --git a/block/blk.h b/block/blk.h
index ca51543..a0abbf6 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -61,7 +61,7 @@ static inline void blk_clear_rq_complete(struct request *rq)
 /*
  * Internal elevator interface
  */
-#define ELV_ON_HASH(rq)(!hlist_unhashed((rq)-hash))
+#define ELV_ON_HASH(rq) hash_hashed((rq)-hash)
 
 void blk_insert_flush(struct request *rq);
 void blk_abort_flushes(struct request_queue *q);
diff --git a/block/elevator.c b/block/elevator.c
index 9b1d42b..898d0eb 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -46,11 +46,6 @@ static LIST_HEAD(elv_list);
 /*
  * Merge hash stuff.
  */
-static const int elv_hash_shift = 6;
-#define ELV_HASH_BLOCK(sec)((sec)  3)
-#define ELV_HASH_FN(sec)   \
-   (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift))
-#define ELV_HASH_ENTRIES   (1  elv_hash_shift)
 #define rq_hash_key(rq)(blk_rq_pos(rq) + blk_rq_sectors(rq))
 
 /*
@@ -142,7 +137,6 @@ static struct elevator_queue *elevator_alloc(struct 
request_queue *q,
  struct elevator_type *e)
 {
struct elevator_queue *eq;
-   int i;
 
eq = kmalloc_node(sizeof(*eq), GFP_KERNEL | __GFP_ZERO, q-node);
if (unlikely(!eq))
@@ -151,14 +145,7 @@ static struct elevator_queue *elevator_alloc(struct 
request_queue *q,
eq-type = e;
kobject_init(eq-kobj, elv_ktype);
mutex_init(eq-sysfs_lock);
-
-   eq-hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES,
-   GFP_KERNEL, q-node);
-   if (!eq-hash)
-   goto err;
-
-   for (i = 0; i  ELV_HASH_ENTRIES; i++)
-   INIT_HLIST_HEAD(eq-hash[i]);
+   hash_init(eq-hash);
 
return eq;
 err:
@@ -173,7 +160,6 @@ static void elevator_release(struct kobject *kobj)
 
e = container_of(kobj, struct elevator_queue, kobj);
elevator_put(e-type);
-   kfree(e-hash);
kfree(e);
 }
 
@@ -240,7 +226,7 @@ EXPORT_SYMBOL(elevator_exit);
 
 static inline void __elv_rqhash_del(struct request *rq)
 {
-   hlist_del_init(rq-hash);
+   hash_del(rq-hash);
 }
 
 static void elv_rqhash_del(struct request_queue *q, struct request *rq)
@@ -254,7 +240,7 @@ static void elv_rqhash_add(struct request_queue *q, struct 
request *rq)
struct elevator_queue *e = q-elevator;
 
BUG_ON(ELV_ON_HASH(rq));
-   hlist_add_head(rq-hash, e-hash[ELV_HASH_FN(rq_hash_key(rq))]);
+   hash_add(e-hash, rq-hash, rq_hash_key(rq));
 }
 
 static void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
@@ -266,11 +252,10 @@ static void elv_rqhash_reposition(struct request_queue 
*q, struct request *rq)
 static struct request *elv_rqhash_find(struct request_queue *q, sector_t 
offset)
 {
struct elevator_queue *e = q-elevator;
-   struct hlist_head *hash_list = e-hash[ELV_HASH_FN(offset)];
struct hlist_node *entry, *next;
struct request *rq;
 
-   hlist_for_each_entry_safe(rq, entry, next, hash_list, hash) {
+   hash_for_each_possible_safe(e-hash, rq, entry, next, hash, offset) {
BUG_ON(!ELV_ON_HASH(rq));
 
if (unlikely(!rq_mergeable(rq))) {
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index c03af76..7587f7f 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -2,6 +2,7 @@
 #define _LINUX_ELEVATOR_H
 
 #include linux/percpu.h
+#include linux/hashtable.h
 
 #ifdef CONFIG_BLOCK
 
@@ -96,6 +97,8 @@ struct elevator_type
struct list_head list;
 };
 
+#define ELV_HASH_BITS 6
+
 /*
  * each queue has an elevator_queue associated with it
  */
@@ -105,7 +108,7 @@ struct elevator_queue
void *elevator_data;
struct kobject kobj;
struct mutex sysfs_lock;
-   struct hlist_head *hash;
+   DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
unsigned int registered:1;
 };
 
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v7 09/16] SUNRPC/cache: use new hashtable implementation

2012-10-28 Thread Sasha Levin

Switch cache to use the new hashtable implementation. This reduces the amount of
generic unrelated code in the cache implementation.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 net/sunrpc/cache.c | 20 +---
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index fc2f7aa..0490546 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -28,6 +28,7 @@
 #include linux/workqueue.h
 #include linux/mutex.h
 #include linux/pagemap.h
+#include linux/hashtable.h
 #include asm/ioctls.h
 #include linux/sunrpc/types.h
 #include linux/sunrpc/cache.h
@@ -524,19 +525,18 @@ EXPORT_SYMBOL_GPL(cache_purge);
  * it to be revisited when cache info is available
  */
 
-#defineDFR_HASHSIZE(PAGE_SIZE/sizeof(struct list_head))
-#defineDFR_HASH(item)  long)item)4 ^ (((long)item)13)) % 
DFR_HASHSIZE)
+#defineDFR_HASH_BITS   9
 
 #defineDFR_MAX 300 /* ??? */
 
 static DEFINE_SPINLOCK(cache_defer_lock);
 static LIST_HEAD(cache_defer_list);
-static struct hlist_head cache_defer_hash[DFR_HASHSIZE];
+static DEFINE_HASHTABLE(cache_defer_hash, DFR_HASH_BITS);
 static int cache_defer_cnt;
 
 static void __unhash_deferred_req(struct cache_deferred_req *dreq)
 {
-   hlist_del_init(dreq-hash);
+   hash_del(dreq-hash);
if (!list_empty(dreq-recent)) {
list_del_init(dreq-recent);
cache_defer_cnt--;
@@ -545,10 +545,7 @@ static void __unhash_deferred_req(struct 
cache_deferred_req *dreq)
 
 static void __hash_deferred_req(struct cache_deferred_req *dreq, struct 
cache_head *item)
 {
-   int hash = DFR_HASH(item);
-
-   INIT_LIST_HEAD(dreq-recent);
-   hlist_add_head(dreq-hash, cache_defer_hash[hash]);
+   hash_add(cache_defer_hash, dreq-hash, (unsigned long)item);
 }
 
 static void setup_deferral(struct cache_deferred_req *dreq,
@@ -600,7 +597,7 @@ static void cache_wait_req(struct cache_req *req, struct 
cache_head *item)
 * to clean up
 */
spin_lock(cache_defer_lock);
-   if (!hlist_unhashed(sleeper.handle.hash)) {
+   if (hash_hashed(sleeper.handle.hash)) {
__unhash_deferred_req(sleeper.handle);
spin_unlock(cache_defer_lock);
} else {
@@ -671,12 +668,11 @@ static void cache_revisit_request(struct cache_head *item)
struct cache_deferred_req *dreq;
struct list_head pending;
struct hlist_node *lp, *tmp;
-   int hash = DFR_HASH(item);
 
INIT_LIST_HEAD(pending);
spin_lock(cache_defer_lock);
 
-   hlist_for_each_entry_safe(dreq, lp, tmp, cache_defer_hash[hash], hash)
+   hash_for_each_possible_safe(cache_defer_hash, dreq, lp, tmp, hash, 
(unsigned long)item)
if (dreq-item == item) {
__unhash_deferred_req(dreq);
list_add(dreq-recent, pending);
@@ -1636,6 +1632,8 @@ static int create_cache_proc_entries(struct cache_detail 
*cd, struct net *net)
 void __init cache_initialize(void)
 {
INIT_DEFERRABLE_WORK(cache_cleaner, do_cache_clean);
+
+   hash_init(cache_defer_hash);
 }
 
 int cache_register_net(struct cache_detail *cd, struct net *net)
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v7 11/16] net,l2tp: use new hashtable implementation

2012-10-28 Thread Sasha Levin

Switch l2tp to use the new hashtable implementation. This reduces the amount of
generic unrelated code in l2tp.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 net/l2tp/l2tp_core.c| 134 ++--
 net/l2tp/l2tp_core.h|   8 +--
 net/l2tp/l2tp_debugfs.c |  19 +++
 3 files changed, 61 insertions(+), 100 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 1a9f372..77029b0 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -44,6 +44,7 @@
 #include linux/udp.h
 #include linux/l2tp.h
 #include linux/hash.h
+#include linux/hashtable.h
 #include linux/sort.h
 #include linux/file.h
 #include linux/nsproxy.h
@@ -107,8 +108,8 @@ static unsigned int l2tp_net_id;
 struct l2tp_net {
struct list_head l2tp_tunnel_list;
spinlock_t l2tp_tunnel_list_lock;
-   struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2];
-   spinlock_t l2tp_session_hlist_lock;
+   DECLARE_HASHTABLE(l2tp_session_hash, L2TP_HASH_BITS_2);
+   spinlock_t l2tp_session_hash_lock;
 };
 
 static void l2tp_session_set_header_len(struct l2tp_session *session, int 
version);
@@ -156,30 +157,17 @@ do {  
\
 #define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
 #endif
 
-/* Session hash global list for L2TPv3.
- * The session_id SHOULD be random according to RFC3931, but several
- * L2TP implementations use incrementing session_ids.  So we do a real
- * hash on the session_id, rather than a simple bitmask.
- */
-static inline struct hlist_head *
-l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id)
-{
-   return pn-l2tp_session_hlist[hash_32(session_id, L2TP_HASH_BITS_2)];
-
-}
-
 /* Lookup a session by id in the global session list
  */
 static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 
session_id)
 {
struct l2tp_net *pn = l2tp_pernet(net);
-   struct hlist_head *session_list =
-   l2tp_session_id_hash_2(pn, session_id);
struct l2tp_session *session;
struct hlist_node *walk;
 
rcu_read_lock_bh();
-   hlist_for_each_entry_rcu(session, walk, session_list, global_hlist) {
+   hash_for_each_possible_rcu(pn-l2tp_session_hash, session, walk,
+   global_hlist, session_id) {
if (session-session_id == session_id) {
rcu_read_unlock_bh();
return session;
@@ -190,23 +178,10 @@ static struct l2tp_session *l2tp_session_find_2(struct 
net *net, u32 session_id)
return NULL;
 }
 
-/* Session hash list.
- * The session_id SHOULD be random according to RFC2661, but several
- * L2TP implementations (Cisco and Microsoft) use incrementing
- * session_ids.  So we do a real hash on the session_id, rather than a
- * simple bitmask.
- */
-static inline struct hlist_head *
-l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
-{
-   return tunnel-session_hlist[hash_32(session_id, L2TP_HASH_BITS)];
-}
-
 /* Lookup a session by id
  */
 struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel 
*tunnel, u32 session_id)
 {
-   struct hlist_head *session_list;
struct l2tp_session *session;
struct hlist_node *walk;
 
@@ -217,15 +192,14 @@ struct l2tp_session *l2tp_session_find(struct net *net, 
struct l2tp_tunnel *tunn
if (tunnel == NULL)
return l2tp_session_find_2(net, session_id);
 
-   session_list = l2tp_session_id_hash(tunnel, session_id);
-   read_lock_bh(tunnel-hlist_lock);
-   hlist_for_each_entry(session, walk, session_list, hlist) {
+   read_lock_bh(tunnel-hash_lock);
+   hash_for_each_possible(tunnel-session_hash, session, walk, hlist, 
session_id) {
if (session-session_id == session_id) {
-   read_unlock_bh(tunnel-hlist_lock);
+   read_unlock_bh(tunnel-hash_lock);
return session;
}
}
-   read_unlock_bh(tunnel-hlist_lock);
+   read_unlock_bh(tunnel-hash_lock);
 
return NULL;
 }
@@ -238,17 +212,15 @@ struct l2tp_session *l2tp_session_find_nth(struct 
l2tp_tunnel *tunnel, int nth)
struct l2tp_session *session;
int count = 0;
 
-   read_lock_bh(tunnel-hlist_lock);
-   for (hash = 0; hash  L2TP_HASH_SIZE; hash++) {
-   hlist_for_each_entry(session, walk, 
tunnel-session_hlist[hash], hlist) {
-   if (++count  nth) {
-   read_unlock_bh(tunnel-hlist_lock);
-   return session;
-   }
+   read_lock_bh(tunnel-hash_lock);
+   hash_for_each(tunnel-session_hash, hash, walk, session, hlist) {
+   if (++count  nth) {
+   read_unlock_bh(tunnel-hash_lock);
+   return session

[PATCH v7 13/16] lockd: use new hashtable implementation

2012-10-28 Thread Sasha Levin

Switch lockd to use the new hashtable implementation. This reduces the amount of
generic unrelated code in lockd.

Signed-off-by: Sasha Levin levinsasha...@gmail.com
---
 fs/lockd/svcsubs.c | 66 +-
 1 file changed, 36 insertions(+), 30 deletions(-)

diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 0deb5f6..d223a1f 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -20,6 +20,7 @@
 #include linux/lockd/share.h
 #include linux/module.h
 #include linux/mount.h
+#include linux/hashtable.h
 
 #define NLMDBG_FACILITYNLMDBG_SVCSUBS
 
@@ -28,8 +29,7 @@
  * Global file hash table
  */
 #define FILE_HASH_BITS 7
-#define FILE_NRHASH(1FILE_HASH_BITS)
-static struct hlist_head   nlm_files[FILE_NRHASH];
+static DEFINE_HASHTABLE(nlm_files, FILE_HASH_BITS);
 static DEFINE_MUTEX(nlm_file_mutex);
 
 #ifdef NFSD_DEBUG
@@ -68,7 +68,7 @@ static inline unsigned int file_hash(struct nfs_fh *f)
int i;
for (i=0; iNFS2_FHSIZE;i++)
tmp += f-data[i];
-   return tmp  (FILE_NRHASH - 1);
+   return tmp;
 }
 
 /*
@@ -86,17 +86,17 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file 
**result,
 {
struct hlist_node *pos;
struct nlm_file *file;
-   unsigned inthash;
+   unsigned intkey;
__be32  nfserr;
 
nlm_debug_print_fh(nlm_lookup_file, f);
 
-   hash = file_hash(f);
+   key = file_hash(f);
 
/* Lock file table */
mutex_lock(nlm_file_mutex);
 
-   hlist_for_each_entry(file, pos, nlm_files[hash], f_list)
+   hash_for_each_possible(nlm_files, file, pos, f_list, file_hash(f))
if (!nfs_compare_fh(file-f_handle, f))
goto found;
 
@@ -123,7 +123,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file 
**result,
goto out_free;
}
 
-   hlist_add_head(file-f_list, nlm_files[hash]);
+   hash_add(nlm_files, file-f_list, key);
 
 found:
dprintk(lockd: found file %p (count %d)\n, file, file-f_count);
@@ -147,8 +147,8 @@ static inline void
 nlm_delete_file(struct nlm_file *file)
 {
nlm_debug_print_file(closing file, file);
-   if (!hlist_unhashed(file-f_list)) {
-   hlist_del(file-f_list);
+   if (hash_hashed(file-f_list)) {
+   hash_del(file-f_list);
nlmsvc_ops-fclose(file-f_file);
kfree(file);
} else {
@@ -253,27 +253,25 @@ nlm_traverse_files(void *data, nlm_host_match_fn_t match,
int i, ret = 0;
 
mutex_lock(nlm_file_mutex);
-   for (i = 0; i  FILE_NRHASH; i++) {
-   hlist_for_each_entry_safe(file, pos, next, nlm_files[i], 
f_list) {
-   if (is_failover_file  !is_failover_file(data, file))
-   continue;
-   file-f_count++;
-   mutex_unlock(nlm_file_mutex);
-
-   /* Traverse locks, blocks and shares of this file
-* and update file-f_locks count */
-   if (nlm_inspect_file(data, file, match))
-   ret = 1;
-
-   mutex_lock(nlm_file_mutex);
-   file-f_count--;
-   /* No more references to this file. Let go of it. */
-   if (list_empty(file-f_blocks)  !file-f_locks
- !file-f_shares  !file-f_count) {
-   hlist_del(file-f_list);
-   nlmsvc_ops-fclose(file-f_file);
-   kfree(file);
-   }
+   hash_for_each_safe(nlm_files, i, pos, next, file, f_list) {
+   if (is_failover_file  !is_failover_file(data, file))
+   continue;
+   file-f_count++;
+   mutex_unlock(nlm_file_mutex);
+
+   /* Traverse locks, blocks and shares of this file
+* and update file-f_locks count */
+   if (nlm_inspect_file(data, file, match))
+   ret = 1;
+
+   mutex_lock(nlm_file_mutex);
+   file-f_count--;
+   /* No more references to this file. Let go of it. */
+   if (list_empty(file-f_blocks)  !file-f_locks
+ !file-f_shares  !file-f_count) {
+   hash_del(file-f_list);
+   nlmsvc_ops-fclose(file-f_file);
+   kfree(file);
}
}
mutex_unlock(nlm_file_mutex);
@@ -451,3 +449,11 @@ nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr)
return ret ? -EIO : 0;
 }
 EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_ip);
+
+static int __init nlm_init(void)
+{
+   hash_init(nlm_files);
+   return 0;
+}
+
+module_init(nlm_init);
-- 
1.7.12.4

--
To unsubscribe from this list: send the line unsubscribe linux

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 43876 matches

Mail list logo