From: Yongliang Gao <[email protected]> When the system has many cores and task switching is frequent, setting set_ftrace_pid can cause frequent pid_list->lock contention and high system sys usage.
For example, in a vmcore environment with 288 cores, We found 267 CPUs are in pid_list->lock contention. #4 [ffffa6226fb4bc70] native_queued_spin_lock_slowpath at ffffffff99cd4b7e #5 [ffffa6226fb4bc90] _raw_spin_lock_irqsave at ffffffff99cd3e36 #6 [ffffa6226fb4bca0] trace_pid_list_is_set at ffffffff99267554 #7 [ffffa6226fb4bcc0] trace_ignore_this_task at ffffffff9925c288 #8 [ffffa6226fb4bcd8] ftrace_filter_pid_sched_switch_probe at ffffffff99246efe #9 [ffffa6226fb4bcf0] __schedule at ffffffff99ccd161 Signed-off-by: Yongliang Gao <[email protected]> Reviewed-by: Huang Cun <[email protected]> --- kernel/trace/pid_list.c | 26 +++++++++++++------------- kernel/trace/pid_list.h | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/kernel/trace/pid_list.c b/kernel/trace/pid_list.c index 090bb5ea4a19..62082a4f60db 100644 --- a/kernel/trace/pid_list.c +++ b/kernel/trace/pid_list.c @@ -138,14 +138,14 @@ bool trace_pid_list_is_set(struct trace_pid_list *pid_list, unsigned int pid) if (pid_split(pid, &upper1, &upper2, &lower) < 0) return false; - raw_spin_lock_irqsave(&pid_list->lock, flags); + read_lock_irqsave(&pid_list->lock, flags); upper_chunk = pid_list->upper[upper1]; if (upper_chunk) { lower_chunk = upper_chunk->data[upper2]; if (lower_chunk) ret = test_bit(lower, lower_chunk->data); } - raw_spin_unlock_irqrestore(&pid_list->lock, flags); + read_unlock_irqrestore(&pid_list->lock, flags); return ret; } @@ -177,7 +177,7 @@ int trace_pid_list_set(struct trace_pid_list *pid_list, unsigned int pid) if (pid_split(pid, &upper1, &upper2, &lower) < 0) return -EINVAL; - raw_spin_lock_irqsave(&pid_list->lock, flags); + write_lock_irqsave(&pid_list->lock, flags); upper_chunk = pid_list->upper[upper1]; if (!upper_chunk) { upper_chunk = get_upper_chunk(pid_list); @@ -199,7 +199,7 @@ int trace_pid_list_set(struct trace_pid_list *pid_list, unsigned int pid) set_bit(lower, lower_chunk->data); ret = 0; out: - raw_spin_unlock_irqrestore(&pid_list->lock, flags); + write_unlock_irqrestore(&pid_list->lock, flags); return ret; } @@ -229,7 +229,7 @@ int trace_pid_list_clear(struct trace_pid_list *pid_list, unsigned int pid) if (pid_split(pid, &upper1, &upper2, &lower) < 0) return -EINVAL; - raw_spin_lock_irqsave(&pid_list->lock, flags); + write_lock_irqsave(&pid_list->lock, flags); upper_chunk = pid_list->upper[upper1]; if (!upper_chunk) goto out; @@ -250,7 +250,7 @@ int trace_pid_list_clear(struct trace_pid_list *pid_list, unsigned int pid) } } out: - raw_spin_unlock_irqrestore(&pid_list->lock, flags); + write_unlock_irqrestore(&pid_list->lock, flags); return 0; } @@ -282,7 +282,7 @@ int trace_pid_list_next(struct trace_pid_list *pid_list, unsigned int pid, if (pid_split(pid, &upper1, &upper2, &lower) < 0) return -EINVAL; - raw_spin_lock_irqsave(&pid_list->lock, flags); + read_lock_irqsave(&pid_list->lock, flags); for (; upper1 <= UPPER_MASK; upper1++, upper2 = 0) { upper_chunk = pid_list->upper[upper1]; @@ -302,7 +302,7 @@ int trace_pid_list_next(struct trace_pid_list *pid_list, unsigned int pid, } found: - raw_spin_unlock_irqrestore(&pid_list->lock, flags); + read_unlock_irqrestore(&pid_list->lock, flags); if (upper1 > UPPER_MASK) return -1; @@ -339,10 +339,10 @@ static void pid_list_refill_irq(struct irq_work *iwork) int lcnt = 0; again: - raw_spin_lock(&pid_list->lock); + write_lock(&pid_list->lock); upper_count = CHUNK_ALLOC - pid_list->free_upper_chunks; lower_count = CHUNK_ALLOC - pid_list->free_lower_chunks; - raw_spin_unlock(&pid_list->lock); + write_unlock(&pid_list->lock); if (upper_count <= 0 && lower_count <= 0) return; @@ -369,7 +369,7 @@ static void pid_list_refill_irq(struct irq_work *iwork) lcnt++; } - raw_spin_lock(&pid_list->lock); + write_lock(&pid_list->lock); if (upper) { *upper_next = pid_list->upper_list; pid_list->upper_list = upper; @@ -380,7 +380,7 @@ static void pid_list_refill_irq(struct irq_work *iwork) pid_list->lower_list = lower; pid_list->free_lower_chunks += lcnt; } - raw_spin_unlock(&pid_list->lock); + write_unlock(&pid_list->lock); /* * On success of allocating all the chunks, both counters @@ -418,7 +418,7 @@ struct trace_pid_list *trace_pid_list_alloc(void) init_irq_work(&pid_list->refill_irqwork, pid_list_refill_irq); - raw_spin_lock_init(&pid_list->lock); + rwlock_init(&pid_list->lock); for (i = 0; i < CHUNK_ALLOC; i++) { union upper_chunk *chunk; diff --git a/kernel/trace/pid_list.h b/kernel/trace/pid_list.h index 62e73f1ac85f..da200834f4ad 100644 --- a/kernel/trace/pid_list.h +++ b/kernel/trace/pid_list.h @@ -76,7 +76,7 @@ union upper_chunk { }; struct trace_pid_list { - raw_spinlock_t lock; + rwlock_t lock; struct irq_work refill_irqwork; union upper_chunk *upper[UPPER1_SIZE]; // 1 or 2K in size union upper_chunk *upper_list; -- 2.43.5
