ChangeSet 1.2065.3.23, 2005/03/12 08:26:51-08:00, [EMAIL PROTECTED]

        [PATCH] sched: rework schedstats
        
        Move balancing fields into struct sched_domain, so we can get more 
useful
        results on systems with multiple domains (eg SMT+SMP, CMP+NUMA, 
SMP+NUMA,
        etc).
        
        Signed-off-by: Nick Piggin <[EMAIL PROTECTED]>
        Acked-by: Ingo Molnar <[EMAIL PROTECTED]>
        Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
        Signed-off-by: Linus Torvalds <[EMAIL PROTECTED]>



 include/linux/sched.h |   13 ++++-
 kernel/sched.c        |  112 +++++++++++++++++++++-----------------------------
 2 files changed, 60 insertions(+), 65 deletions(-)


diff -Nru a/include/linux/sched.h b/include/linux/sched.h
--- a/include/linux/sched.h     2005-03-12 21:29:30 -08:00
+++ b/include/linux/sched.h     2005-03-12 21:29:30 -08:00
@@ -489,17 +489,26 @@
        /* load_balance() stats */
        unsigned long lb_cnt[MAX_IDLE_TYPES];
        unsigned long lb_failed[MAX_IDLE_TYPES];
+       unsigned long lb_balanced[MAX_IDLE_TYPES];
        unsigned long lb_imbalance[MAX_IDLE_TYPES];
+       unsigned long lb_gained[MAX_IDLE_TYPES];
+       unsigned long lb_hot_gained[MAX_IDLE_TYPES];
        unsigned long lb_nobusyg[MAX_IDLE_TYPES];
        unsigned long lb_nobusyq[MAX_IDLE_TYPES];
 
+       /* Active load balancing */
+       unsigned long alb_cnt;
+       unsigned long alb_failed;
+       unsigned long alb_pushed;
+
        /* sched_balance_exec() stats */
        unsigned long sbe_attempts;
        unsigned long sbe_pushed;
 
        /* try_to_wake_up() stats */
-       unsigned long ttwu_wake_affine;
-       unsigned long ttwu_wake_balance;
+       unsigned long ttwu_wake_remote;
+       unsigned long ttwu_move_affine;
+       unsigned long ttwu_move_balance;
 #endif
 };
 
diff -Nru a/kernel/sched.c b/kernel/sched.c
--- a/kernel/sched.c    2005-03-12 21:29:30 -08:00
+++ b/kernel/sched.c    2005-03-12 21:29:30 -08:00
@@ -248,35 +248,13 @@
        unsigned long yld_cnt;
 
        /* schedule() stats */
-       unsigned long sched_noswitch;
        unsigned long sched_switch;
        unsigned long sched_cnt;
        unsigned long sched_goidle;
 
-       /* pull_task() stats */
-       unsigned long pt_gained[MAX_IDLE_TYPES];
-       unsigned long pt_lost[MAX_IDLE_TYPES];
-
-       /* active_load_balance() stats */
-       unsigned long alb_cnt;
-       unsigned long alb_lost;
-       unsigned long alb_gained;
-       unsigned long alb_failed;
-
        /* try_to_wake_up() stats */
        unsigned long ttwu_cnt;
-       unsigned long ttwu_attempts;
-       unsigned long ttwu_moved;
-
-       /* wake_up_new_task() stats */
-       unsigned long wunt_cnt;
-       unsigned long wunt_moved;
-
-       /* sched_migrate_task() stats */
-       unsigned long smt_cnt;
-
-       /* sched_balance_exec() stats */
-       unsigned long sbe_cnt;
+       unsigned long ttwu_local;
 #endif
 };
 
@@ -331,7 +309,7 @@
  * bump this up when changing the output format or the meaning of an existing
  * format, so that tools can adapt (or abort)
  */
-#define SCHEDSTAT_VERSION 10
+#define SCHEDSTAT_VERSION 11
 
 static int show_schedstat(struct seq_file *seq, void *v)
 {
@@ -349,22 +327,14 @@
 
                /* runqueue-specific stats */
                seq_printf(seq,
-                   "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu "
-                   "%lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
+                   "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
                    cpu, rq->yld_both_empty,
-                   rq->yld_act_empty, rq->yld_exp_empty,
-                   rq->yld_cnt, rq->sched_noswitch,
+                   rq->yld_act_empty, rq->yld_exp_empty, rq->yld_cnt,
                    rq->sched_switch, rq->sched_cnt, rq->sched_goidle,
-                   rq->alb_cnt, rq->alb_gained, rq->alb_lost,
-                   rq->alb_failed,
-                   rq->ttwu_cnt, rq->ttwu_moved, rq->ttwu_attempts,
-                   rq->wunt_cnt, rq->wunt_moved,
-                   rq->smt_cnt, rq->sbe_cnt, rq->rq_sched_info.cpu_time,
+                   rq->ttwu_cnt, rq->ttwu_local,
+                   rq->rq_sched_info.cpu_time,
                    rq->rq_sched_info.run_delay, rq->rq_sched_info.pcnt);
 
-               for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES; itype++)
-                       seq_printf(seq, " %lu %lu", rq->pt_gained[itype],
-                                                   rq->pt_lost[itype]);
                seq_printf(seq, "\n");
 
 #ifdef CONFIG_SMP
@@ -375,17 +345,21 @@
                        cpumask_scnprintf(mask_str, NR_CPUS, sd->span);
                        seq_printf(seq, "domain%d %s", dcnt++, mask_str);
                        for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES;
-                                               itype++) {
-                               seq_printf(seq, " %lu %lu %lu %lu %lu",
+                                       itype++) {
+                               seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu 
%lu",
                                    sd->lb_cnt[itype],
+                                   sd->lb_balanced[itype],
                                    sd->lb_failed[itype],
                                    sd->lb_imbalance[itype],
+                                   sd->lb_gained[itype],
+                                   sd->lb_hot_gained[itype],
                                    sd->lb_nobusyq[itype],
                                    sd->lb_nobusyg[itype]);
                        }
-                       seq_printf(seq, " %lu %lu %lu %lu\n",
+                       seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu\n",
+                           sd->alb_cnt, sd->alb_failed, sd->alb_pushed,
                            sd->sbe_pushed, sd->sbe_attempts,
-                           sd->ttwu_wake_affine, sd->ttwu_wake_balance);
+                           sd->ttwu_wake_remote, sd->ttwu_move_affine, 
sd->ttwu_move_balance);
                }
 #endif
        }
@@ -998,7 +972,6 @@
 #endif
 
        rq = task_rq_lock(p, &flags);
-       schedstat_inc(rq, ttwu_cnt);
        old_state = p->state;
        if (!(old_state & state))
                goto out;
@@ -1013,8 +986,21 @@
        if (unlikely(task_running(rq, p)))
                goto out_activate;
 
-       new_cpu = cpu;
+#ifdef CONFIG_SCHEDSTATS
+       schedstat_inc(rq, ttwu_cnt);
+       if (cpu == this_cpu) {
+               schedstat_inc(rq, ttwu_local);
+       } else {
+               for_each_domain(this_cpu, sd) {
+                       if (cpu_isset(cpu, sd->span)) {
+                               schedstat_inc(sd, ttwu_wake_remote);
+                               break;
+                       }
+               }
+       }
+#endif
 
+       new_cpu = cpu;
        if (cpu == this_cpu || unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
                goto out_set_cpu;
 
@@ -1053,7 +1039,7 @@
                         * in this domain.
                         */
                        if (cpu_isset(cpu, sd->span)) {
-                               schedstat_inc(sd, ttwu_wake_affine);
+                               schedstat_inc(sd, ttwu_move_affine);
                                goto out_set_cpu;
                        }
                } else if ((sd->flags & SD_WAKE_BALANCE) &&
@@ -1063,7 +1049,7 @@
                         * an imbalance.
                         */
                        if (cpu_isset(cpu, sd->span)) {
-                               schedstat_inc(sd, ttwu_wake_balance);
+                               schedstat_inc(sd, ttwu_move_balance);
                                goto out_set_cpu;
                        }
                }
@@ -1071,10 +1057,8 @@
 
        new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */
 out_set_cpu:
-       schedstat_inc(rq, ttwu_attempts);
        new_cpu = wake_idle(new_cpu, p);
        if (new_cpu != cpu) {
-               schedstat_inc(rq, ttwu_moved);
                set_task_cpu(p, new_cpu);
                task_rq_unlock(rq, &flags);
                /* might preempt at this point */
@@ -1217,7 +1201,6 @@
 
        BUG_ON(p->state != TASK_RUNNING);
 
-       schedstat_inc(rq, wunt_cnt);
        /*
         * We decrease the sleep average of forking parents
         * and children as well, to keep max-interactive tasks
@@ -1269,7 +1252,6 @@
                if (TASK_PREEMPTS_CURR(p, rq))
                        resched_task(rq->curr);
 
-               schedstat_inc(rq, wunt_moved);
                /*
                 * Parent and child are on different CPUs, now get the
                 * parent runqueue to update the parent's ->sleep_avg:
@@ -1573,7 +1555,6 @@
            || unlikely(cpu_is_offline(dest_cpu)))
                goto out;
 
-       schedstat_inc(rq, smt_cnt);
        /* force the process onto the specified CPU */
        if (migrate_task(p, dest_cpu, &req)) {
                /* Need to wait for migration thread (might exit: take ref). */
@@ -1601,7 +1582,6 @@
        struct sched_domain *tmp, *sd = NULL;
        int new_cpu, this_cpu = get_cpu();
 
-       schedstat_inc(this_rq(), sbe_cnt);
        /* Prefer the current CPU if there's only this task running */
        if (this_rq()->nr_running <= 1)
                goto out;
@@ -1744,13 +1724,10 @@
                goto skip_bitmap;
        }
 
-       /*
-        * Right now, this is the only place pull_task() is called,
-        * so we can safely collect pull_task() stats here rather than
-        * inside pull_task().
-        */
-       schedstat_inc(this_rq, pt_gained[idle]);
-       schedstat_inc(busiest, pt_lost[idle]);
+#ifdef CONFIG_SCHEDSTATS
+       if (task_hot(tmp, busiest->timestamp_last_tick, sd))
+               schedstat_inc(sd, lb_hot_gained[idle]);
+#endif
 
        pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu);
        pulled++;
@@ -1763,6 +1740,12 @@
                goto skip_bitmap;
        }
 out:
+       /*
+        * Right now, this is the only place pull_task() is called,
+        * so we can safely collect pull_task() stats here rather than
+        * inside pull_task().
+        */
+       schedstat_add(sd, lb_gained[idle], pulled);
        return pulled;
 }
 
@@ -2023,6 +2006,8 @@
 out_balanced:
        spin_unlock(&this_rq->lock);
 
+       schedstat_inc(sd, lb_balanced[idle]);
+
        /* tune up the balancing interval */
        if (sd->balance_interval < sd->max_interval)
                sd->balance_interval *= 2;
@@ -2048,12 +2033,14 @@
        schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
        group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE);
        if (!group) {
+               schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
                schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]);
                goto out;
        }
 
        busiest = find_busiest_queue(group);
        if (!busiest || busiest == this_rq) {
+               schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
                schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
                goto out;
        }
@@ -2107,7 +2094,6 @@
        cpumask_t visited_cpus;
        int cpu;
 
-       schedstat_inc(busiest_rq, alb_cnt);
        /*
         * Search for suitable CPUs to push tasks to in successively higher
         * domains with SD_LOAD_BALANCE set.
@@ -2118,6 +2104,8 @@
                        /* no more domains to search */
                        break;
 
+               schedstat_inc(sd, alb_cnt);
+
                cpu_group = sd->groups;
                do {
                        for_each_cpu_mask(cpu, cpu_group->cpumask) {
@@ -2142,10 +2130,9 @@
                                double_lock_balance(busiest_rq, target_rq);
                                if (move_tasks(target_rq, cpu, busiest_rq,
                                                1, sd, SCHED_IDLE)) {
-                                       schedstat_inc(busiest_rq, alb_lost);
-                                       schedstat_inc(target_rq, alb_gained);
+                                       schedstat_inc(sd, alb_pushed);
                                } else {
-                                       schedstat_inc(busiest_rq, alb_failed);
+                                       schedstat_inc(sd, alb_failed);
                                }
                                spin_unlock(&target_rq->lock);
                        }
@@ -2736,8 +2723,7 @@
                array = rq->active;
                rq->expired_timestamp = 0;
                rq->best_expired_prio = MAX_PRIO;
-       } else
-               schedstat_inc(rq, sched_noswitch);
+       }
 
        idx = sched_find_first_bit(array->bitmap);
        queue = array->queue + idx;
-
To unsubscribe from this list: send the line "unsubscribe bk-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to