commit: aa99b5941813b8267a0dee6085aadda60385aede Author: Mike Pagano <mpagano <AT> gentoo <DOT> org> AuthorDate: Thu Feb 9 12:39:52 2023 +0000 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org> CommitDate: Thu Feb 9 12:39:52 2023 +0000 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=aa99b594
Bump BMQ Patch to 6.1-r4 Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org> 0000_README | 2 +- ... => 5020_BMQ-and-PDS-io-scheduler-v6.1-r4.patch | 216 ++++++++++++++------- 2 files changed, 148 insertions(+), 70 deletions(-) diff --git a/0000_README b/0000_README index 1fb22543..62ade3e6 100644 --- a/0000_README +++ b/0000_README @@ -139,6 +139,6 @@ Patch: 5010_enable-cpu-optimizations-universal.patch From: https://github.com/graysky2/kernel_compiler_patch Desc: Kernel >= 5.15 patch enables gcc = v11.1+ optimizations for additional CPUs. -Patch: 5020_BMQ-and-PDS-io-scheduler-v6.1-r0.patch +Patch: 5020_BMQ-and-PDS-io-scheduler-v6.1-r4.patch From: https://gitlab.com/alfredchen/projectc Desc: BMQ(BitMap Queue) Scheduler. A new CPU scheduler developed from PDS(incld). Inspired by the scheduler in zircon. diff --git a/5020_BMQ-and-PDS-io-scheduler-v6.1-r0.patch b/5020_BMQ-and-PDS-io-scheduler-v6.1-r4.patch similarity index 98% rename from 5020_BMQ-and-PDS-io-scheduler-v6.1-r0.patch rename to 5020_BMQ-and-PDS-io-scheduler-v6.1-r4.patch index 783f3bca..7c2a77d3 100644 --- a/5020_BMQ-and-PDS-io-scheduler-v6.1-r0.patch +++ b/5020_BMQ-and-PDS-io-scheduler-v6.1-r4.patch @@ -369,7 +369,7 @@ index 94125d3b6893..c87ba766d354 100644 +menuconfig SCHED_ALT + bool "Alternative CPU Schedulers" -+ default n ++ default y + help + This feature enable alternative CPU scheduler" + @@ -632,10 +632,10 @@ index 976092b7bd45..31d587c16ec1 100644 obj-y += build_utility.o diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c new file mode 100644 -index 000000000000..4bea0c025475 +index 000000000000..a9e906b229eb --- /dev/null +++ b/kernel/sched/alt_core.c -@@ -0,0 +1,7912 @@ +@@ -0,0 +1,7982 @@ +/* + * kernel/sched/alt_core.c + * @@ -665,7 +665,6 @@ index 000000000000..4bea0c025475 +#include <linux/init_task.h> +#include <linux/kcov.h> +#include <linux/kprobes.h> -+#include <linux/profile.h> +#include <linux/nmi.h> +#include <linux/scs.h> + @@ -706,7 +705,7 @@ index 000000000000..4bea0c025475 +#define sched_feat(x) (0) +#endif /* CONFIG_SCHED_DEBUG */ + -+#define ALT_SCHED_VERSION "v6.1-r0" ++#define ALT_SCHED_VERSION "v6.1-r4" + +/* rt_prio(prio) defined in include/linux/sched/rt.h */ +#define rt_task(p) rt_prio((p)->prio) @@ -786,7 +785,8 @@ index 000000000000..4bea0c025475 +#ifdef CONFIG_SCHED_SMT +static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; +#endif -+static cpumask_t sched_rq_watermark[SCHED_QUEUE_BITS] ____cacheline_aligned_in_smp; ++static cpumask_t sched_preempt_mask[SCHED_QUEUE_BITS] ____cacheline_aligned_in_smp; ++static cpumask_t *const sched_idle_mask = &sched_preempt_mask[0]; + +/* sched_queue related functions */ +static inline void sched_queue_init(struct sched_queue *q) @@ -810,44 +810,66 @@ index 000000000000..4bea0c025475 + list_add(&idle->sq_node, &q->heads[idle->sq_idx]); +} + ++static inline void ++clear_recorded_preempt_mask(int pr, int low, int high, int cpu) ++{ ++ if (low < pr && pr <= high) ++ cpumask_clear_cpu(cpu, sched_preempt_mask + SCHED_QUEUE_BITS - pr); ++} ++ ++static inline void ++set_recorded_preempt_mask(int pr, int low, int high, int cpu) ++{ ++ if (low < pr && pr <= high) ++ cpumask_set_cpu(cpu, sched_preempt_mask + SCHED_QUEUE_BITS - pr); ++} ++ ++static atomic_t sched_prio_record = ATOMIC_INIT(0); ++ +/* water mark related functions */ -+static inline void update_sched_rq_watermark(struct rq *rq) ++static inline void update_sched_preempt_mask(struct rq *rq) +{ -+ unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); -+ unsigned long last_wm = rq->watermark; -+ unsigned long i; -+ int cpu; ++ unsigned long prio = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS); ++ unsigned long last_prio = rq->prio; ++ int cpu, pr; + -+ if (watermark == last_wm) ++ if (prio == last_prio) + return; + -+ rq->watermark = watermark; ++ rq->prio = prio; + cpu = cpu_of(rq); -+ if (watermark < last_wm) { -+ for (i = last_wm; i > watermark; i--) -+ cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i); ++ pr = atomic_read(&sched_prio_record); ++ ++ if (prio < last_prio) { ++ if (IDLE_TASK_SCHED_PRIO == last_prio) { ++ cpumask_clear_cpu(cpu, sched_idle_mask); ++ last_prio -= 2; +#ifdef CONFIG_SCHED_SMT -+ if (static_branch_likely(&sched_smt_present) && -+ IDLE_TASK_SCHED_PRIO == last_wm) -+ cpumask_andnot(&sched_sg_idle_mask, -+ &sched_sg_idle_mask, cpu_smt_mask(cpu)); ++ if (static_branch_likely(&sched_smt_present)) ++ cpumask_andnot(&sched_sg_idle_mask, ++ &sched_sg_idle_mask, cpu_smt_mask(cpu)); +#endif ++ } ++ clear_recorded_preempt_mask(pr, prio, last_prio, cpu); ++ + return; + } -+ /* last_wm < watermark */ -+ for (i = watermark; i > last_wm; i--) -+ cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i); ++ /* last_prio < prio */ ++ if (IDLE_TASK_SCHED_PRIO == prio) { ++ cpumask_set_cpu(cpu, sched_idle_mask); ++ prio -= 2; +#ifdef CONFIG_SCHED_SMT -+ if (static_branch_likely(&sched_smt_present) && -+ IDLE_TASK_SCHED_PRIO == watermark) { -+ cpumask_t tmp; ++ if (static_branch_likely(&sched_smt_present)) { ++ cpumask_t tmp; + -+ cpumask_and(&tmp, cpu_smt_mask(cpu), sched_rq_watermark); -+ if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) -+ cpumask_or(&sched_sg_idle_mask, -+ &sched_sg_idle_mask, cpu_smt_mask(cpu)); -+ } ++ cpumask_and(&tmp, cpu_smt_mask(cpu), sched_idle_mask); ++ if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) ++ cpumask_or(&sched_sg_idle_mask, ++ &sched_sg_idle_mask, cpu_smt_mask(cpu)); ++ } +#endif ++ } ++ set_recorded_preempt_mask(pr, last_prio, prio, cpu); +} + +/* @@ -1369,8 +1391,8 @@ index 000000000000..4bea0c025475 + * Context: rq->lock + */ +#define __SCHED_DEQUEUE_TASK(p, rq, flags) \ -+ psi_dequeue(p, flags & DEQUEUE_SLEEP); \ + sched_info_dequeue(rq, p); \ ++ psi_dequeue(p, flags & DEQUEUE_SLEEP); \ + \ + list_del(&p->sq_node); \ + if (list_empty(&rq->queue.heads[p->sq_idx])) \ @@ -1378,7 +1400,7 @@ index 000000000000..4bea0c025475 + +#define __SCHED_ENQUEUE_TASK(p, rq, flags) \ + sched_info_enqueue(rq, p); \ -+ psi_enqueue(p, flags); \ ++ psi_enqueue(p, flags & ENQUEUE_WAKEUP); \ + \ + p->sq_idx = task_sched_prio_idx(p, rq); \ + list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]); \ @@ -1411,7 +1433,7 @@ index 000000000000..4bea0c025475 + task_cpu(p), cpu_of(rq)); + + __SCHED_ENQUEUE_TASK(p, rq, flags); -+ update_sched_rq_watermark(rq); ++ update_sched_preempt_mask(rq); + ++rq->nr_running; +#ifdef CONFIG_SMP + if (2 == rq->nr_running) @@ -1436,7 +1458,7 @@ index 000000000000..4bea0c025475 + rq->queue.bitmap); + p->sq_idx = idx; + set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap); -+ update_sched_rq_watermark(rq); ++ update_sched_preempt_mask(rq); + } +} + @@ -2007,11 +2029,13 @@ index 000000000000..4bea0c025475 + + WARN_ON_ONCE(is_migration_disabled(p)); +#endif -+ if (task_cpu(p) == new_cpu) -+ return; + trace_sched_migrate_task(p, new_cpu); -+ rseq_migrate(p); -+ perf_event_task_migrate(p); ++ ++ if (task_cpu(p) != new_cpu) ++ { ++ rseq_migrate(p); ++ perf_event_task_migrate(p); ++ } + + __set_task_cpu(p, new_cpu); +} @@ -2163,7 +2187,7 @@ index 000000000000..4bea0c025475 + + WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING); + dequeue_task(p, rq, 0); -+ update_sched_rq_watermark(rq); ++ update_sched_preempt_mask(rq); + set_task_cpu(p, new_cpu); + raw_spin_unlock(&rq->lock); + @@ -2525,23 +2549,50 @@ index 000000000000..4bea0c025475 + return dest_cpu; +} + ++static inline void ++sched_preempt_mask_flush(cpumask_t *mask, int prio) ++{ ++ int cpu; ++ ++ cpumask_copy(mask, sched_idle_mask); ++ ++ for_each_cpu_not(cpu, mask) { ++ if (prio < cpu_rq(cpu)->prio) ++ cpumask_set_cpu(cpu, mask); ++ } ++} ++ ++static inline int ++preempt_mask_check(struct task_struct *p, cpumask_t *allow_mask, cpumask_t *preempt_mask) ++{ ++ int task_prio = task_sched_prio(p); ++ cpumask_t *mask = sched_preempt_mask + SCHED_QUEUE_BITS - 1 - task_prio; ++ int pr = atomic_read(&sched_prio_record); ++ ++ if (pr != task_prio) { ++ sched_preempt_mask_flush(mask, task_prio); ++ atomic_set(&sched_prio_record, task_prio); ++ } ++ ++ return cpumask_and(preempt_mask, allow_mask, mask); ++} ++ +static inline int select_task_rq(struct task_struct *p) +{ -+ cpumask_t chk_mask, tmp; ++ cpumask_t allow_mask, mask; + -+ if (unlikely(!cpumask_and(&chk_mask, p->cpus_ptr, cpu_active_mask))) ++ if (unlikely(!cpumask_and(&allow_mask, p->cpus_ptr, cpu_active_mask))) + return select_fallback_rq(task_cpu(p), p); + + if ( +#ifdef CONFIG_SCHED_SMT -+ cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) || ++ cpumask_and(&mask, &allow_mask, &sched_sg_idle_mask) || +#endif -+ cpumask_and(&tmp, &chk_mask, sched_rq_watermark) || -+ cpumask_and(&tmp, &chk_mask, -+ sched_rq_watermark + SCHED_QUEUE_BITS - 1 - task_sched_prio(p))) -+ return best_mask_cpu(task_cpu(p), &tmp); ++ cpumask_and(&mask, &allow_mask, sched_idle_mask) || ++ preempt_mask_check(p, &allow_mask, &mask)) ++ return best_mask_cpu(task_cpu(p), &mask); + -+ return best_mask_cpu(task_cpu(p), &chk_mask); ++ return best_mask_cpu(task_cpu(p), &allow_mask); +} + +void sched_set_stop_task(int cpu, struct task_struct *stop) @@ -4678,12 +4729,12 @@ index 000000000000..4bea0c025475 + * find potential cpus which can migrate the current running task + */ + if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) && -+ cpumask_andnot(&chk, cpu_online_mask, sched_rq_watermark) && ++ cpumask_andnot(&chk, cpu_online_mask, sched_idle_mask) && + cpumask_andnot(&chk, &chk, &sched_rq_pending_mask)) { + int i; + + for_each_cpu_wrap(i, &chk, cpu) { -+ if (cpumask_subset(cpu_smt_mask(i), &chk) && ++ if (!cpumask_intersects(cpu_smt_mask(i), sched_idle_mask) &&\ + sg_balance_trigger(i)) + return; + } @@ -4806,6 +4857,7 @@ index 000000000000..4bea0c025475 +static void sched_tick_stop(int cpu) +{ + struct tick_work *twork; ++ int os; + + if (housekeeping_cpu(cpu, HK_TYPE_TICK)) + return; @@ -4813,7 +4865,10 @@ index 000000000000..4bea0c025475 + WARN_ON_ONCE(!tick_work_cpu); + + twork = per_cpu_ptr(tick_work_cpu, cpu); -+ cancel_delayed_work_sync(&twork->work); ++ /* There cannot be competing actions, but don't rely on stop-machine. */ ++ os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_OFFLINING); ++ WARN_ON_ONCE(os != TICK_SCHED_REMOTE_RUNNING); ++ /* Don't cancel, as this would mess up the state machine. */ +} +#endif /* CONFIG_HOTPLUG_CPU */ + @@ -4988,7 +5043,7 @@ index 000000000000..4bea0c025475 +{ + printk(KERN_INFO "sched: pending: 0x%04lx, idle: 0x%04lx, sg_idle: 0x%04lx\n", + sched_rq_pending_mask.bits[0], -+ sched_rq_watermark[0].bits[0], ++ sched_idle_mask->bits[0], + sched_sg_idle_mask.bits[0]); +} +#else @@ -5060,15 +5115,15 @@ index 000000000000..4bea0c025475 + if (src_rq->nr_running < 2) + cpumask_clear_cpu(i, &sched_rq_pending_mask); + ++ spin_release(&src_rq->lock.dep_map, _RET_IP_); ++ do_raw_spin_unlock(&src_rq->lock); ++ + rq->nr_running += nr_migrated; + if (rq->nr_running > 1) + cpumask_set_cpu(cpu, &sched_rq_pending_mask); + + cpufreq_update_util(rq, 0); + -+ spin_release(&src_rq->lock.dep_map, _RET_IP_); -+ do_raw_spin_unlock(&src_rq->lock); -+ + return 1; + } + @@ -5097,7 +5152,7 @@ index 000000000000..4bea0c025475 +} + +static inline struct task_struct * -+choose_next_task(struct rq *rq, int cpu, struct task_struct *prev) ++choose_next_task(struct rq *rq, int cpu) +{ + struct task_struct *next; + @@ -5254,7 +5309,7 @@ index 000000000000..4bea0c025475 + prev->sched_contributes_to_load = + (prev_state & TASK_UNINTERRUPTIBLE) && + !(prev_state & TASK_NOLOAD) && -+ !(prev->flags & TASK_FROZEN); ++ !(prev_state & TASK_FROZEN); + + if (prev->sched_contributes_to_load) + rq->nr_uninterruptible++; @@ -5284,7 +5339,7 @@ index 000000000000..4bea0c025475 + + check_curr(prev, rq); + -+ next = choose_next_task(rq, cpu, prev); ++ next = choose_next_task(rq, cpu); + clear_tsk_need_resched(prev); + clear_preempt_need_resched(); +#ifdef CONFIG_SCHED_DEBUG @@ -5293,7 +5348,7 @@ index 000000000000..4bea0c025475 + + if (likely(prev != next)) { + if (deactivated) -+ update_sched_rq_watermark(rq); ++ update_sched_preempt_mask(rq); + next->last_ran = rq->clock_task; + rq->last_ts_switch = rq->clock; + @@ -5714,6 +5769,7 @@ index 000000000000..4bea0c025475 + return; + + rq = __task_access_lock(p, &lock); ++ update_rq_clock(rq); + /* + * Set under pi_lock && rq->lock, such that the value can be used under + * either lock. @@ -6593,6 +6649,13 @@ index 000000000000..4bea0c025475 + return retval; +} + ++#ifdef CONFIG_SMP ++int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask) ++{ ++ return 0; ++} ++#endif ++ +static int +__sched_setaffinity(struct task_struct *p, const struct cpumask *mask) +{ @@ -7431,7 +7494,6 @@ index 000000000000..4bea0c025475 + + raw_spin_lock_irqsave(&idle->pi_lock, flags); + raw_spin_lock(&rq->lock); -+ update_rq_clock(rq); + + idle->last_ran = rq->clock_task; + idle->__state = TASK_RUNNING; @@ -7978,6 +8040,14 @@ index 000000000000..4bea0c025475 + + sched_smp_initialized = true; +} ++ ++static int __init migration_init(void) ++{ ++ sched_cpu_starting(smp_processor_id()); ++ return 0; ++} ++early_initcall(migration_init); ++ +#else +void __init sched_init_smp(void) +{ @@ -8030,7 +8100,7 @@ index 000000000000..4bea0c025475 + +#ifdef CONFIG_SMP + for (i = 0; i < SCHED_QUEUE_BITS; i++) -+ cpumask_copy(sched_rq_watermark + i, cpu_present_mask); ++ cpumask_copy(sched_preempt_mask + i, cpu_present_mask); +#endif + +#ifdef CONFIG_CGROUP_SCHED @@ -8044,7 +8114,7 @@ index 000000000000..4bea0c025475 + rq = cpu_rq(i); + + sched_queue_init(&rq->queue); -+ rq->watermark = IDLE_TASK_SCHED_PRIO; ++ rq->prio = IDLE_TASK_SCHED_PRIO; + rq->skip = NULL; + + raw_spin_lock_init(&rq->lock); @@ -8587,14 +8657,15 @@ index 000000000000..1212a031700e +{} diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h new file mode 100644 -index 000000000000..7c1cc0cbca0d +index 000000000000..c32403ed82b6 --- /dev/null +++ b/kernel/sched/alt_sched.h -@@ -0,0 +1,660 @@ +@@ -0,0 +1,668 @@ +#ifndef ALT_SCHED_H +#define ALT_SCHED_H + +#include <linux/context_tracking.h> ++#include <linux/profile.h> +#include <linux/psi.h> +#include <linux/stop_machine.h> +#include <linux/syscalls.h> @@ -8732,7 +8803,7 @@ index 000000000000..7c1cc0cbca0d +#ifdef CONFIG_SCHED_PDS + u64 time_edge; +#endif -+ unsigned long watermark; ++ unsigned long prio; + + /* switch count */ + u64 nr_switches; @@ -8997,17 +9068,24 @@ index 000000000000..7c1cc0cbca0d +} + +static inline void -+rq_unlock_irq(struct rq *rq, struct rq_flags *rf) ++rq_unlock(struct rq *rq, struct rq_flags *rf) + __releases(rq->lock) +{ -+ raw_spin_unlock_irq(&rq->lock); ++ raw_spin_unlock(&rq->lock); +} + +static inline void -+rq_unlock(struct rq *rq, struct rq_flags *rf) ++rq_lock_irq(struct rq *rq, struct rq_flags *rf) ++ __acquires(rq->lock) ++{ ++ raw_spin_lock_irq(&rq->lock); ++} ++ ++static inline void ++rq_unlock_irq(struct rq *rq, struct rq_flags *rf) + __releases(rq->lock) +{ -+ raw_spin_unlock(&rq->lock); ++ raw_spin_unlock_irq(&rq->lock); +} + +static inline struct rq *