on 16/01/2012 00:18 Andriy Gapon said the following:
> Author: avg
> Date: Sun Jan 15 22:18:54 2012
> New Revision: 230174
> URL: http://svn.freebsd.org/changeset/base/230174
> 
> Log:
>   MFC r228718: ule: ensure that batch timeshare threads are scheduled
>   fairly

Apologies, this commit is screwed up.

What happened.  There was a merge conflict.  I initially chose the postpone
option.  Then edited the file to get a correct merge.  Then ran svn resolve.
It complained that it needed --accept option, so I used --accept=base.  I didn't
do another svn diff after that before committing the change and that was my
mistake.  Apparently svn resolve --accept=base changed the file.

> Modified:
>   stable/8/sys/kern/sched_ule.c
> Directory Properties:
>   stable/8/sys/   (props changed)
>   stable/8/sys/amd64/include/xen/   (props changed)
>   stable/8/sys/cddl/contrib/opensolaris/   (props changed)
>   stable/8/sys/contrib/dev/acpica/   (props changed)
>   stable/8/sys/contrib/pf/   (props changed)
> 
> Modified: stable/8/sys/kern/sched_ule.c
> ==============================================================================
> --- stable/8/sys/kern/sched_ule.c     Sun Jan 15 22:10:35 2012        
> (r230173)
> +++ stable/8/sys/kern/sched_ule.c     Sun Jan 15 22:18:54 2012        
> (r230174)
> @@ -62,10 +62,6 @@ __FBSDID("$FreeBSD$");
>  #include <sys/vmmeter.h>
>  #include <sys/cpuset.h>
>  #include <sys/sbuf.h>
> -#ifdef KTRACE
> -#include <sys/uio.h>
> -#include <sys/ktrace.h>
> -#endif
>  
>  #ifdef HWPMC_HOOKS
>  #include <sys/pmckern.h>
> @@ -80,7 +76,7 @@ dtrace_vtime_switch_func_t  dtrace_vtime_
>  #include <machine/cpu.h>
>  #include <machine/smp.h>
>  
> -#if defined(__sparc64__)
> +#if defined(__powerpc__) && defined(E500)
>  #error "This architecture is not currently compatible with ULE"
>  #endif
>  
> @@ -88,7 +84,7 @@ dtrace_vtime_switch_func_t  dtrace_vtime_
>  
>  #define      TS_NAME_LEN (MAXCOMLEN + sizeof(" td ") + 
> sizeof(__XSTRING(UINT_MAX)))
>  #define      TDQ_NAME_LEN    (sizeof("sched lock ") + 
> sizeof(__XSTRING(MAXCPU)))
> -#define      TDQ_LOADNAME_LEN        (PCPU_NAME_LEN + sizeof(" load"))
> +#define      TDQ_LOADNAME_LEN        (sizeof("CPU ") + 
> sizeof(__XSTRING(MAXCPU)) - 1 + sizeof(" load"))
>  
>  /*
>   * Thread scheduler specific section.  All fields are protected
> @@ -122,11 +118,17 @@ static struct td_sched td_sched0;
>  
>  /*
>   * Priority ranges used for interactive and non-interactive timeshare
> - * threads.  Interactive threads use realtime priorities.
> - */
> -#define      PRI_MIN_INTERACT        PRI_MIN_REALTIME
> -#define      PRI_MAX_INTERACT        PRI_MAX_REALTIME
> -#define      PRI_MIN_BATCH           PRI_MIN_TIMESHARE
> + * threads.  The timeshare priorities are split up into four ranges.
> + * The first range handles interactive threads.  The last three ranges
> + * (NHALF, x, and NHALF) handle non-interactive threads with the outer
> + * ranges supporting nice values.
> + */
> +#define      PRI_TIMESHARE_RANGE     (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE 
> + 1)
> +#define      PRI_INTERACT_RANGE      ((PRI_TIMESHARE_RANGE - 
> SCHED_PRI_NRESV) / 2)
> +
> +#define      PRI_MIN_INTERACT        PRI_MIN_TIMESHARE
> +#define      PRI_MAX_INTERACT        (PRI_MIN_TIMESHARE + PRI_INTERACT_RANGE 
> - 1)
> +#define      PRI_MIN_BATCH           (PRI_MIN_TIMESHARE + PRI_INTERACT_RANGE)
>  #define      PRI_MAX_BATCH           PRI_MAX_TIMESHARE
>  
>  /*
> @@ -209,7 +211,7 @@ static int preempt_thresh = 0;
>  #endif
>  static int static_boost = PRI_MIN_BATCH;
>  static int sched_idlespins = 10000;
> -static int sched_idlespinthresh = 4;
> +static int sched_idlespinthresh = 16;
>  
>  /*
>   * tdq - per processor runqs and statistics.  All fields are protected by the
> @@ -221,6 +223,7 @@ struct tdq {
>       struct mtx      tdq_lock;               /* run queue lock. */
>       struct cpu_group *tdq_cg;               /* Pointer to cpu topology. */
>       volatile int    tdq_load;               /* Aggregate load. */
> +     volatile int    tdq_cpu_idle;           /* cpu_idle() is active. */
>       int             tdq_sysload;            /* For loadavg, !ITHD load. */
>       int             tdq_transferable;       /* Transferable thread count. */
>       short           tdq_switchcnt;          /* Switches this tick. */
> @@ -561,7 +564,7 @@ struct cpu_search {
>  
>  #define      CPUSET_FOREACH(cpu, mask)                               \
>       for ((cpu) = 0; (cpu) <= mp_maxid; (cpu)++)             \
> -             if ((mask) & 1 << (cpu))
> +             if (CPU_ISSET(cpu, &mask))
>  
>  static __inline int cpu_search(struct cpu_group *cg, struct cpu_search *low,
>      struct cpu_search *high, const int match);
> @@ -836,6 +839,7 @@ sched_balance_pair(struct tdq *high, str
>       int low_load;
>       int moved;
>       int move;
> +     int cpu;
>       int diff;
>       int i;
>  
> @@ -857,10 +861,14 @@ sched_balance_pair(struct tdq *high, str
>               for (i = 0; i < move; i++)
>                       moved += tdq_move(high, low);
>               /*
> -              * IPI the target cpu to force it to reschedule with the new
> -              * workload.
> +              * In case the target isn't the current cpu IPI it to force a
> +              * reschedule with the new workload.
>                */
> -             ipi_cpu(TDQ_ID(low), IPI_PREEMPT);
> +             cpu = TDQ_ID(low);
> +             sched_pin();
> +             if (cpu != PCPU_GET(cpuid))
> +                     ipi_cpu(cpu, IPI_PREEMPT);
> +             sched_unpin();
>       }
>       tdq_unlock_pair(high, low);
>       return (moved);
> @@ -979,7 +987,7 @@ tdq_notify(struct tdq *tdq, struct threa
>                * If the MD code has an idle wakeup routine try that before
>                * falling back to IPI.
>                */
> -             if (cpu_idle_wakeup(cpu))
> +             if (!tdq->tdq_cpu_idle || cpu_idle_wakeup(cpu))
>                       return;
>       }
>       tdq->tdq_ipipending = 1;
> @@ -1426,8 +1434,7 @@ sched_priority(struct thread *td)
>       } else {
>               pri = SCHED_PRI_MIN;
>               if (td->td_sched->ts_ticks)
> -                     pri += min(SCHED_PRI_TICKS(td->td_sched),
> -                         SCHED_PRI_RANGE);
> +                     pri += SCHED_PRI_TICKS(td->td_sched);
>               pri += SCHED_PRI_NICE(td->td_proc->p_nice);
>               KASSERT(pri >= PRI_MIN_BATCH && pri <= PRI_MAX_BATCH,
>                   ("sched_priority: invalid priority %d: nice %d, " 
> @@ -1688,39 +1695,24 @@ sched_prio(struct thread *td, u_char pri
>  void
>  sched_user_prio(struct thread *td, u_char prio)
>  {
> -     u_char oldprio;
>  
>       td->td_base_user_pri = prio;
> -     if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio)
> -                return;
> -     oldprio = td->td_user_pri;
> +     if (td->td_lend_user_pri <= prio)
> +             return;
>       td->td_user_pri = prio;
>  }
>  
>  void
>  sched_lend_user_prio(struct thread *td, u_char prio)
>  {
> -     u_char oldprio;
>  
>       THREAD_LOCK_ASSERT(td, MA_OWNED);
> -     td->td_flags |= TDF_UBORROWING;
> -     oldprio = td->td_user_pri;
> -     td->td_user_pri = prio;
> -}
> -
> -void
> -sched_unlend_user_prio(struct thread *td, u_char prio)
> -{
> -     u_char base_pri;
> -
> -     THREAD_LOCK_ASSERT(td, MA_OWNED);
> -     base_pri = td->td_base_user_pri;
> -     if (prio >= base_pri) {
> -             td->td_flags &= ~TDF_UBORROWING;
> -             sched_user_prio(td, base_pri);
> -     } else {
> -             sched_lend_user_prio(td, prio);
> -     }
> +     td->td_lend_user_pri = prio;
> +     td->td_user_pri = min(prio, td->td_base_user_pri);
> +     if (td->td_priority > td->td_user_pri)
> +             sched_prio(td, td->td_user_pri);
> +     else if (td->td_priority != td->td_user_pri)
> +             td->td_flags |= TDF_NEEDRESCHED;
>  }
>  
>  /*
> @@ -1913,6 +1905,8 @@ sched_sleep(struct thread *td, int prio)
>       td->td_slptick = ticks;
>       if (TD_IS_SUSPENDED(td) || prio >= PSOCK)
>               td->td_flags |= TDF_CANSWAP;
> +     if (PRI_BASE(td->td_pri_class) != PRI_TIMESHARE)
> +             return;
>       if (static_boost == 1 && prio)
>               sched_prio(td, prio);
>       else if (static_boost && td->td_priority > static_boost)
> @@ -2179,7 +2173,7 @@ sched_clock(struct thread *td)
>   * is easier than trying to scale based on stathz.
>   */
>  void
> -sched_tick(void)
> +sched_tick(int cnt)
>  {
>       struct td_sched *ts;
>  
> @@ -2191,7 +2185,7 @@ sched_tick(void)
>       if (ts->ts_incrtick == ticks)
>               return;
>       /* Adjust ticks for pctcpu */
> -     ts->ts_ticks += 1 << SCHED_TICK_SHIFT;
> +     ts->ts_ticks += cnt << SCHED_TICK_SHIFT;
>       ts->ts_ltick = ticks;
>       ts->ts_incrtick = ticks;
>       /*
> @@ -2562,8 +2556,14 @@ sched_idletd(void *dummy)
>                       }
>               }
>               switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
> -             if (tdq->tdq_load == 0)
> -                     cpu_idle(switchcnt > 1);
> +             if (tdq->tdq_load == 0) {
> +                     tdq->tdq_cpu_idle = 1;
> +                     if (tdq->tdq_load == 0) {
> +                             cpu_idle(switchcnt > sched_idlespinthresh * 4);
> +                             tdq->tdq_switchcnt++;
> +                     }
> +                     tdq->tdq_cpu_idle = 0;
> +             }
>               if (tdq->tdq_load) {
>                       thread_lock(td);
>                       mi_switch(SW_VOL | SWT_IDLE, NULL);
> @@ -2586,8 +2586,6 @@ sched_throw(struct thread *td)
>               /* Correct spinlock nesting and acquire the correct lock. */
>               TDQ_LOCK(tdq);
>               spinlock_exit();
> -             PCPU_SET(switchtime, cpu_ticks());
> -             PCPU_SET(switchticks, ticks);
>       } else {
>               MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
>               tdq_load_rem(tdq, td);
> @@ -2596,6 +2594,8 @@ sched_throw(struct thread *td)
>       KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count"));
>       newtd = choosethread();
>       TDQ_LOCKPTR(tdq)->mtx_lock = (uintptr_t)newtd;
> +     PCPU_SET(switchtime, cpu_ticks());
> +     PCPU_SET(switchticks, ticks);
>       cpu_throw(td, newtd);           /* doesn't return */
>  }
>  
> @@ -2655,15 +2655,16 @@ static int
>  sysctl_kern_sched_topology_spec_internal(struct sbuf *sb, struct cpu_group 
> *cg,
>      int indent)
>  {
> +     char cpusetbuf[CPUSETBUFSIZ];
>       int i, first;
>  
>       sbuf_printf(sb, "%*s<group level=\"%d\" cache-level=\"%d\">\n", indent,
>           "", 1 + indent / 2, cg->cg_level);
> -     sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"0x%x\">", indent, "",
> -         cg->cg_count, cg->cg_mask);
> +     sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"%s\">", indent, "",
> +         cg->cg_count, cpusetobj_strprint(cpusetbuf, &cg->cg_mask));
>       first = TRUE;
>       for (i = 0; i < MAXCPU; i++) {
> -             if ((cg->cg_mask & (1 << i)) != 0) {
> +             if (CPU_ISSET(i, &cg->cg_mask)) {
>                       if (!first)
>                               sbuf_printf(sb, ", ");
>                       else
> @@ -2722,6 +2723,7 @@ sysctl_kern_sched_topology_spec(SYSCTL_H
>       sbuf_delete(topo);
>       return (err);
>  }
> +
>  #endif
>  
>  SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler");
> @@ -2758,6 +2760,7 @@ SYSCTL_INT(_kern_sched, OID_AUTO, steal_
>  SYSCTL_PROC(_kern_sched, OID_AUTO, topology_spec, CTLTYPE_STRING |
>      CTLFLAG_RD, NULL, 0, sysctl_kern_sched_topology_spec, "A", 
>      "XML dump of detected CPU topology");
> +
>  #endif
>  
>  /* ps compat.  All cpu percentages from ULE are weighted. */


-- 
Andriy Gapon
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to