[tip: sched/core] sched/debug: Rename the sched_debug parameter to sched_verbose
The following commit has been merged into the sched/core branch of tip: Commit-ID: 9406415f46f6127fd31bb66f0260f7a61a8d2786 Gitweb: https://git.kernel.org/tip/9406415f46f6127fd31bb66f0260f7a61a8d2786 Author:Peter Zijlstra AuthorDate:Thu, 15 Apr 2021 18:23:17 +02:00 Committer: Peter Zijlstra CommitterDate: Sat, 17 Apr 2021 13:22:44 +02:00 sched/debug: Rename the sched_debug parameter to sched_verbose CONFIG_SCHED_DEBUG is the build-time Kconfig knob, the boot param sched_debug and the /debug/sched/debug_enabled knobs control the sched_debug_enabled variable, but what they really do is make SCHED_DEBUG more verbose, so rename the lot. Signed-off-by: Peter Zijlstra (Intel) --- Documentation/admin-guide/kernel-parameters.txt | 2 +- Documentation/scheduler/sched-domains.rst | 10 +- kernel/sched/debug.c| 4 ++-- kernel/sched/sched.h| 2 +- kernel/sched/topology.c | 12 ++-- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 0454572..9e4c026 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4725,7 +4725,7 @@ sbni= [NET] Granch SBNI12 leased line adapter - sched_debug [KNL] Enables verbose scheduler debug messages. + sched_verbose [KNL] Enables verbose scheduler debug messages. schedstats= [KNL,X86] Enable or disable scheduled statistics. Allowed values are enable and disable. This feature diff --git a/Documentation/scheduler/sched-domains.rst b/Documentation/scheduler/sched-domains.rst index 8582fa5..14ea2f2 100644 --- a/Documentation/scheduler/sched-domains.rst +++ b/Documentation/scheduler/sched-domains.rst @@ -74,8 +74,8 @@ for a given topology level by creating a sched_domain_topology_level array and calling set_sched_topology() with this array as the parameter. The sched-domains debugging infrastructure can be enabled by enabling -CONFIG_SCHED_DEBUG and adding 'sched_debug' to your cmdline. If you forgot to -tweak your cmdline, you can also flip the /sys/kernel/debug/sched_debug -knob. This enables an error checking parse of the sched domains which should -catch most possible errors (described above). It also prints out the domain -structure in a visual format. +CONFIG_SCHED_DEBUG and adding 'sched_debug_verbose' to your cmdline. If you +forgot to tweak your cmdline, you can also flip the +/sys/kernel/debug/sched/verbose knob. This enables an error checking parse of +the sched domains which should catch most possible errors (described above). It +also prints out the domain structure in a visual format. diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index bf199d6..461342f 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -275,7 +275,7 @@ static const struct file_operations sched_dynamic_fops = { #endif /* CONFIG_PREEMPT_DYNAMIC */ -__read_mostly bool sched_debug_enabled; +__read_mostly bool sched_debug_verbose; static const struct seq_operations sched_debug_sops; @@ -300,7 +300,7 @@ static __init int sched_init_debug(void) debugfs_sched = debugfs_create_dir("sched", NULL); debugfs_create_file("features", 0644, debugfs_sched, NULL, _feat_fops); - debugfs_create_bool("debug_enabled", 0644, debugfs_sched, _debug_enabled); + debugfs_create_bool("verbose", 0644, debugfs_sched, _debug_verbose); #ifdef CONFIG_PREEMPT_DYNAMIC debugfs_create_file("preempt", 0644, debugfs_sched, NULL, _dynamic_fops); #endif diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 55232db..bde7248 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2363,7 +2363,7 @@ extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq); extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq); #ifdef CONFIG_SCHED_DEBUG -extern bool sched_debug_enabled; +extern bool sched_debug_verbose; extern void print_cfs_stats(struct seq_file *m, int cpu); extern void print_rt_stats(struct seq_file *m, int cpu); diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index c343aed..55a0a24 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -14,15 +14,15 @@ static cpumask_var_t sched_domains_tmpmask2; static int __init sched_debug_setup(char *str) { - sched_debug_enabled = true; + sched_debug_verbose = true; return 0; } -early_param("sched_debug", sched_debug_setup); +early_param("sched_verbose", sched_debug_setup); static inline bool sched_debug(void) { - return sched_debug_enabled; + return sched_debug_verbose; } #define SD_FLAG(_name, mflags) [__##_name] = { .meta_flags = mflags, .name = #_name }, @@ -131,7 +131,7 @@ static void
[tip: sched/core] sched: Use cpu_dying() to fix balance_push vs hotplug-rollback
The following commit has been merged into the sched/core branch of tip: Commit-ID: b5c4477366fb5e6a2f0f38742c33acd666c07698 Gitweb: https://git.kernel.org/tip/b5c4477366fb5e6a2f0f38742c33acd666c07698 Author:Peter Zijlstra AuthorDate:Thu, 21 Jan 2021 16:09:32 +01:00 Committer: Peter Zijlstra CommitterDate: Fri, 16 Apr 2021 17:06:32 +02:00 sched: Use cpu_dying() to fix balance_push vs hotplug-rollback Use the new cpu_dying() state to simplify and fix the balance_push() vs CPU hotplug rollback state. Specifically, we currently rely on notifiers sched_cpu_dying() / sched_cpu_activate() to terminate balance_push, however if the cpu_down() fails when we're past sched_cpu_deactivate(), it should terminate balance_push at that point and not wait until we hit sched_cpu_activate(). Similarly, when cpu_up() fails and we're going back down, balance_push should be active, where it currently is not. So instead, make sure balance_push is enabled below SCHED_AP_ACTIVE (when !cpu_active()), and gate it's utility with cpu_dying(). Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/yhgayef83vqhk...@hirez.programming.kicks-ass.net --- kernel/sched/core.c | 26 +++--- kernel/sched/sched.h | 1 - 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 95bd6ab..7d031da 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1811,7 +1811,7 @@ static inline bool is_cpu_allowed(struct task_struct *p, int cpu) return cpu_online(cpu); /* Regular kernel threads don't get to stay during offline. */ - if (cpu_rq(cpu)->balance_push) + if (cpu_dying(cpu)) return false; /* But are allowed during online. */ @@ -7638,6 +7638,9 @@ static DEFINE_PER_CPU(struct cpu_stop_work, push_work); /* * Ensure we only run per-cpu kthreads once the CPU goes !active. + * + * This is enabled below SCHED_AP_ACTIVE; when !cpu_active(), but only + * effective when the hotplug motion is down. */ static void balance_push(struct rq *rq) { @@ -7645,12 +7648,19 @@ static void balance_push(struct rq *rq) lockdep_assert_held(>lock); SCHED_WARN_ON(rq->cpu != smp_processor_id()); + /* * Ensure the thing is persistent until balance_push_set(.on = false); */ rq->balance_callback = _push_callback; /* +* Only active while going offline. +*/ + if (!cpu_dying(rq->cpu)) + return; + + /* * Both the cpu-hotplug and stop task are in this case and are * required to complete the hotplug process. * @@ -7703,7 +7713,6 @@ static void balance_push_set(int cpu, bool on) struct rq_flags rf; rq_lock_irqsave(rq, ); - rq->balance_push = on; if (on) { WARN_ON_ONCE(rq->balance_callback); rq->balance_callback = _push_callback; @@ -7828,8 +7837,8 @@ int sched_cpu_activate(unsigned int cpu) struct rq_flags rf; /* -* Make sure that when the hotplug state machine does a roll-back -* we clear balance_push. Ideally that would happen earlier... +* Clear the balance_push callback and prepare to schedule +* regular tasks. */ balance_push_set(cpu, false); @@ -8014,12 +8023,6 @@ int sched_cpu_dying(unsigned int cpu) } rq_unlock_irqrestore(rq, ); - /* -* Now that the CPU is offline, make sure we're welcome -* to new tasks once we come back up. -*/ - balance_push_set(cpu, false); - calc_load_migrate(rq); update_max_interval(); hrtick_clear(rq); @@ -8204,7 +8207,7 @@ void __init sched_init(void) rq->sd = NULL; rq->rd = NULL; rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE; - rq->balance_callback = NULL; + rq->balance_callback = _push_callback; rq->active_balance = 0; rq->next_balance = jiffies; rq->push_cpu = 0; @@ -8251,6 +8254,7 @@ void __init sched_init(void) #ifdef CONFIG_SMP idle_thread_set_boot_cpu(); + balance_push_set(smp_processor_id(), false); #endif init_sched_fair_class(); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index cbb0b01..7e7e936 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -983,7 +983,6 @@ struct rq { unsigned long cpu_capacity_orig; struct callback_head*balance_callback; - unsigned char balance_push; unsigned char nohz_idle_balance; unsigned char idle_balance;
[tip: sched/core] cpumask: Make cpu_{online,possible,present,active}() inline
The following commit has been merged into the sched/core branch of tip: Commit-ID: b02a4fd8148f655095d9e3d6eddd8f0042bcc27c Gitweb: https://git.kernel.org/tip/b02a4fd8148f655095d9e3d6eddd8f0042bcc27c Author:Peter Zijlstra AuthorDate:Mon, 25 Jan 2021 16:46:49 +01:00 Committer: Peter Zijlstra CommitterDate: Fri, 16 Apr 2021 17:06:32 +02:00 cpumask: Make cpu_{online,possible,present,active}() inline Prepare for addition of another mask. Primarily a code movement to avoid having to create more #ifdef, but while there, convert everything with an argument to an inline function. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210310150109.045447...@infradead.org --- include/linux/cpumask.h | 97 +++- 1 file changed, 66 insertions(+), 31 deletions(-) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 383684e..a584336 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -98,37 +98,6 @@ extern struct cpumask __cpu_active_mask; extern atomic_t __num_online_cpus; -#if NR_CPUS > 1 -/** - * num_online_cpus() - Read the number of online CPUs - * - * Despite the fact that __num_online_cpus is of type atomic_t, this - * interface gives only a momentary snapshot and is not protected against - * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held - * region. - */ -static inline unsigned int num_online_cpus(void) -{ - return atomic_read(&__num_online_cpus); -} -#define num_possible_cpus()cpumask_weight(cpu_possible_mask) -#define num_present_cpus() cpumask_weight(cpu_present_mask) -#define num_active_cpus() cpumask_weight(cpu_active_mask) -#define cpu_online(cpu)cpumask_test_cpu((cpu), cpu_online_mask) -#define cpu_possible(cpu) cpumask_test_cpu((cpu), cpu_possible_mask) -#define cpu_present(cpu) cpumask_test_cpu((cpu), cpu_present_mask) -#define cpu_active(cpu)cpumask_test_cpu((cpu), cpu_active_mask) -#else -#define num_online_cpus() 1U -#define num_possible_cpus()1U -#define num_present_cpus() 1U -#define num_active_cpus() 1U -#define cpu_online(cpu)((cpu) == 0) -#define cpu_possible(cpu) ((cpu) == 0) -#define cpu_present(cpu) ((cpu) == 0) -#define cpu_active(cpu)((cpu) == 0) -#endif - extern cpumask_t cpus_booted_once_mask; static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits) @@ -894,6 +863,72 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) return to_cpumask(p); } +#if NR_CPUS > 1 +/** + * num_online_cpus() - Read the number of online CPUs + * + * Despite the fact that __num_online_cpus is of type atomic_t, this + * interface gives only a momentary snapshot and is not protected against + * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held + * region. + */ +static inline unsigned int num_online_cpus(void) +{ + return atomic_read(&__num_online_cpus); +} +#define num_possible_cpus()cpumask_weight(cpu_possible_mask) +#define num_present_cpus() cpumask_weight(cpu_present_mask) +#define num_active_cpus() cpumask_weight(cpu_active_mask) + +static inline bool cpu_online(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_online_mask); +} + +static inline bool cpu_possible(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_possible_mask); +} + +static inline bool cpu_present(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_present_mask); +} + +static inline bool cpu_active(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_active_mask); +} + +#else + +#define num_online_cpus() 1U +#define num_possible_cpus()1U +#define num_present_cpus() 1U +#define num_active_cpus() 1U + +static inline bool cpu_online(unsigned int cpu) +{ + return cpu == 0; +} + +static inline bool cpu_possible(unsigned int cpu) +{ + return cpu == 0; +} + +static inline bool cpu_present(unsigned int cpu) +{ + return cpu == 0; +} + +static inline bool cpu_active(unsigned int cpu) +{ + return cpu == 0; +} + +#endif /* NR_CPUS > 1 */ + #define cpu_is_offline(cpu)unlikely(!cpu_online(cpu)) #if NR_CPUS <= BITS_PER_LONG
[tip: sched/core] cpumask: Introduce DYING mask
The following commit has been merged into the sched/core branch of tip: Commit-ID: e40f74c535b8a0ecf3ef0388b51a34cdadb34fb5 Gitweb: https://git.kernel.org/tip/e40f74c535b8a0ecf3ef0388b51a34cdadb34fb5 Author:Peter Zijlstra AuthorDate:Tue, 19 Jan 2021 18:43:45 +01:00 Committer: Peter Zijlstra CommitterDate: Fri, 16 Apr 2021 17:06:32 +02:00 cpumask: Introduce DYING mask Introduce a cpumask that indicates (for each CPU) what direction the CPU hotplug is currently going. Notably, it tracks rollbacks. Eg. when an up fails and we do a roll-back down, it will accurately reflect the direction. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210310150109.151441...@infradead.org --- include/linux/cpumask.h | 20 kernel/cpu.c| 6 ++ 2 files changed, 26 insertions(+) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index a584336..e6b948a 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -91,10 +91,12 @@ extern struct cpumask __cpu_possible_mask; extern struct cpumask __cpu_online_mask; extern struct cpumask __cpu_present_mask; extern struct cpumask __cpu_active_mask; +extern struct cpumask __cpu_dying_mask; #define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask) #define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask) #define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask) #define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask) +#define cpu_dying_mask((const struct cpumask *)&__cpu_dying_mask) extern atomic_t __num_online_cpus; @@ -826,6 +828,14 @@ set_cpu_active(unsigned int cpu, bool active) cpumask_clear_cpu(cpu, &__cpu_active_mask); } +static inline void +set_cpu_dying(unsigned int cpu, bool dying) +{ + if (dying) + cpumask_set_cpu(cpu, &__cpu_dying_mask); + else + cpumask_clear_cpu(cpu, &__cpu_dying_mask); +} /** * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * @@ -900,6 +910,11 @@ static inline bool cpu_active(unsigned int cpu) return cpumask_test_cpu(cpu, cpu_active_mask); } +static inline bool cpu_dying(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_dying_mask); +} + #else #define num_online_cpus() 1U @@ -927,6 +942,11 @@ static inline bool cpu_active(unsigned int cpu) return cpu == 0; } +static inline bool cpu_dying(unsigned int cpu) +{ + return false; +} + #endif /* NR_CPUS > 1 */ #define cpu_is_offline(cpu)unlikely(!cpu_online(cpu)) diff --git a/kernel/cpu.c b/kernel/cpu.c index 23505d6..838dcf2 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -160,6 +160,9 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state, int (*cb)(unsigned int cpu); int ret, cnt; + if (cpu_dying(cpu) != !bringup) + set_cpu_dying(cpu, !bringup); + if (st->fail == state) { st->fail = CPUHP_INVALID; return -EAGAIN; @@ -2512,6 +2515,9 @@ EXPORT_SYMBOL(__cpu_present_mask); struct cpumask __cpu_active_mask __read_mostly; EXPORT_SYMBOL(__cpu_active_mask); +struct cpumask __cpu_dying_mask __read_mostly; +EXPORT_SYMBOL(__cpu_dying_mask); + atomic_t __num_online_cpus __read_mostly; EXPORT_SYMBOL(__num_online_cpus);
[tip: sched/core] sched: Move SCHED_DEBUG sysctl to debugfs
The following commit has been merged into the sched/core branch of tip: Commit-ID: 8a99b6833c884fa0e7919030d93fecedc69fc625 Gitweb: https://git.kernel.org/tip/8a99b6833c884fa0e7919030d93fecedc69fc625 Author:Peter Zijlstra AuthorDate:Wed, 24 Mar 2021 11:43:21 +01:00 Committer: Peter Zijlstra CommitterDate: Fri, 16 Apr 2021 17:06:34 +02:00 sched: Move SCHED_DEBUG sysctl to debugfs Stop polluting sysctl with undocumented knobs that really are debug only, move them all to /debug/sched/ along with the existing /debug/sched_* files that already exist. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Greg Kroah-Hartman Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210412102001.287610...@infradead.org --- include/linux/sched/sysctl.h | 8 +-- kernel/sched/core.c | 4 +- kernel/sched/debug.c | 74 +-- kernel/sched/fair.c | 9 +--- kernel/sched/sched.h | 2 +- kernel/sysctl.c | 96 +--- 6 files changed, 80 insertions(+), 113 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 3c31ba8..0a3f346 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -26,10 +26,11 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, enum { sysctl_hung_task_timeout_secs = 0 }; #endif +extern unsigned int sysctl_sched_child_runs_first; + extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; -extern unsigned int sysctl_sched_child_runs_first; enum sched_tunable_scaling { SCHED_TUNABLESCALING_NONE, @@ -37,7 +38,7 @@ enum sched_tunable_scaling { SCHED_TUNABLESCALING_LINEAR, SCHED_TUNABLESCALING_END, }; -extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; +extern unsigned int sysctl_sched_tunable_scaling; extern unsigned int sysctl_numa_balancing_scan_delay; extern unsigned int sysctl_numa_balancing_scan_period_min; @@ -47,9 +48,6 @@ extern unsigned int sysctl_numa_balancing_scan_size; #ifdef CONFIG_SCHED_DEBUG extern __read_mostly unsigned int sysctl_sched_migration_cost; extern __read_mostly unsigned int sysctl_sched_nr_migrate; - -int sched_proc_update_handler(struct ctl_table *table, int write, - void *buffer, size_t *length, loff_t *ppos); #endif /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7d031da..bac30db 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5504,9 +5504,11 @@ static const struct file_operations sched_dynamic_fops = { .release= single_release, }; +extern struct dentry *debugfs_sched; + static __init int sched_init_debug_dynamic(void) { - debugfs_create_file("sched_preempt", 0644, NULL, NULL, _dynamic_fops); + debugfs_create_file("sched_preempt", 0644, debugfs_sched, NULL, _dynamic_fops); return 0; } late_initcall(sched_init_debug_dynamic); diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 4b49cc2..2093b90 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -169,15 +169,81 @@ static const struct file_operations sched_feat_fops = { .release= single_release, }; +#ifdef CONFIG_SMP + +static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[16]; + + if (cnt > 15) + cnt = 15; + + if (copy_from_user(, ubuf, cnt)) + return -EFAULT; + + if (kstrtouint(buf, 10, _sched_tunable_scaling)) + return -EINVAL; + + if (sched_update_scaling()) + return -EINVAL; + + *ppos += cnt; + return cnt; +} + +static int sched_scaling_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", sysctl_sched_tunable_scaling); + return 0; +} + +static int sched_scaling_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, sched_scaling_show, NULL); +} + +static const struct file_operations sched_scaling_fops = { + .open = sched_scaling_open, + .write = sched_scaling_write, + .read = seq_read, + .llseek = seq_lseek, + .release= single_release, +}; + +#endif /* SMP */ + __read_mostly bool sched_debug_enabled; +struct dentry *debugfs_sched; + static __init int sched_init_debug(void) { - debugfs_create_file("sched_features", 0644, NULL, NULL, - _feat_fops); + struct dentry __maybe_unused *numa; - debugfs_create_bool("sched_debug", 0644, NULL, - _debug_enabled); + debugfs_sched = debugfs_create_dir("sched", NULL); + + debugfs_create_file("features", 0644, debugfs_sched, NULL, _feat_fops); + debugfs_create_bool("debug_enabled", 0644,
[tip: sched/core] sched: Remove sched_schedstats sysctl out from under SCHED_DEBUG
The following commit has been merged into the sched/core branch of tip: Commit-ID: 1d1c2509de4488cc58c924d0a6117c62de1d4f9c Gitweb: https://git.kernel.org/tip/1d1c2509de4488cc58c924d0a6117c62de1d4f9c Author:Peter Zijlstra AuthorDate:Wed, 24 Mar 2021 19:47:43 +01:00 Committer: Peter Zijlstra CommitterDate: Fri, 16 Apr 2021 17:06:33 +02:00 sched: Remove sched_schedstats sysctl out from under SCHED_DEBUG CONFIG_SCHEDSTATS does not depend on SCHED_DEBUG, it is inconsistent to have the sysctl depend on it. Suggested-by: Mel Gorman Signed-off-by: Peter Zijlstra (Intel) Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210412102001.161151...@infradead.org --- kernel/sysctl.c | 22 +++--- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8042098..17f1cc9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1711,17 +1711,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, -#ifdef CONFIG_SCHEDSTATS - { - .procname = "sched_schedstats", - .data = NULL, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = sysctl_schedstats, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif /* CONFIG_SCHEDSTATS */ #endif /* CONFIG_SMP */ #ifdef CONFIG_NUMA_BALANCING { @@ -1755,6 +1744,17 @@ static struct ctl_table kern_table[] = { }, #endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_SCHED_DEBUG */ +#ifdef CONFIG_SCHEDSTATS + { + .procname = "sched_schedstats", + .data = NULL, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_schedstats, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif /* CONFIG_SCHEDSTATS */ #ifdef CONFIG_NUMA_BALANCING { .procname = "numa_balancing",
[tip: sched/core] sched,preempt: Move preempt_dynamic to debug.c
The following commit has been merged into the sched/core branch of tip: Commit-ID: 1011dcce99f8026d48fdd7b9cc259e32a8b472be Gitweb: https://git.kernel.org/tip/1011dcce99f8026d48fdd7b9cc259e32a8b472be Author:Peter Zijlstra AuthorDate:Thu, 25 Mar 2021 12:21:38 +01:00 Committer: Peter Zijlstra CommitterDate: Fri, 16 Apr 2021 17:06:34 +02:00 sched,preempt: Move preempt_dynamic to debug.c Move the #ifdef SCHED_DEBUG bits to kernel/sched/debug.c in order to collect all the debugfs bits. Signed-off-by: Peter Zijlstra (Intel) Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210412102001.353833...@infradead.org --- kernel/sched/core.c | 77 +-- kernel/sched/debug.c | 67 - kernel/sched/sched.h | 11 -- 3 files changed, 78 insertions(+), 77 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index bac30db..e6c714b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5371,9 +5371,9 @@ enum { preempt_dynamic_full, }; -static int preempt_dynamic_mode = preempt_dynamic_full; +int preempt_dynamic_mode = preempt_dynamic_full; -static int sched_dynamic_mode(const char *str) +int sched_dynamic_mode(const char *str) { if (!strcmp(str, "none")) return preempt_dynamic_none; @@ -5387,7 +5387,7 @@ static int sched_dynamic_mode(const char *str) return -EINVAL; } -static void sched_dynamic_update(int mode) +void sched_dynamic_update(int mode) { /* * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in @@ -5444,79 +5444,8 @@ static int __init setup_preempt_mode(char *str) } __setup("preempt=", setup_preempt_mode); -#ifdef CONFIG_SCHED_DEBUG - -static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - char buf[16]; - int mode; - - if (cnt > 15) - cnt = 15; - - if (copy_from_user(, ubuf, cnt)) - return -EFAULT; - - buf[cnt] = 0; - mode = sched_dynamic_mode(strstrip(buf)); - if (mode < 0) - return mode; - - sched_dynamic_update(mode); - - *ppos += cnt; - - return cnt; -} - -static int sched_dynamic_show(struct seq_file *m, void *v) -{ - static const char * preempt_modes[] = { - "none", "voluntary", "full" - }; - int i; - - for (i = 0; i < ARRAY_SIZE(preempt_modes); i++) { - if (preempt_dynamic_mode == i) - seq_puts(m, "("); - seq_puts(m, preempt_modes[i]); - if (preempt_dynamic_mode == i) - seq_puts(m, ")"); - - seq_puts(m, " "); - } - - seq_puts(m, "\n"); - return 0; -} - -static int sched_dynamic_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, sched_dynamic_show, NULL); -} - -static const struct file_operations sched_dynamic_fops = { - .open = sched_dynamic_open, - .write = sched_dynamic_write, - .read = seq_read, - .llseek = seq_lseek, - .release= single_release, -}; - -extern struct dentry *debugfs_sched; - -static __init int sched_init_debug_dynamic(void) -{ - debugfs_create_file("sched_preempt", 0644, debugfs_sched, NULL, _dynamic_fops); - return 0; -} -late_initcall(sched_init_debug_dynamic); - -#endif /* CONFIG_SCHED_DEBUG */ #endif /* CONFIG_PREEMPT_DYNAMIC */ - /* * This is the entry point to schedule() from kernel preemption * off of irq context. diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 2093b90..bdd344f 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -213,9 +213,71 @@ static const struct file_operations sched_scaling_fops = { #endif /* SMP */ +#ifdef CONFIG_PREEMPT_DYNAMIC + +static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[16]; + int mode; + + if (cnt > 15) + cnt = 15; + + if (copy_from_user(, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + mode = sched_dynamic_mode(strstrip(buf)); + if (mode < 0) + return mode; + + sched_dynamic_update(mode); + + *ppos += cnt; + + return cnt; +} + +static int sched_dynamic_show(struct seq_file *m, void *v) +{ + static const char * preempt_modes[] = { + "none", "voluntary", "full" + }; + int i; + + for (i = 0; i < ARRAY_SIZE(preempt_modes); i++) { + if (preempt_dynamic_mode == i) + seq_puts(m, "("); + seq_puts(m, preempt_modes[i]); + if (preempt_dynamic_mode == i) + seq_puts(m, ")"); + + seq_puts(m, "
[tip: sched/core] sched: Don't make LATENCYTOP select SCHED_DEBUG
The following commit has been merged into the sched/core branch of tip: Commit-ID: d86ba831656611872e4939b895503ddac63d8196 Gitweb: https://git.kernel.org/tip/d86ba831656611872e4939b895503ddac63d8196 Author:Peter Zijlstra AuthorDate:Wed, 24 Mar 2021 19:48:34 +01:00 Committer: Peter Zijlstra CommitterDate: Fri, 16 Apr 2021 17:06:33 +02:00 sched: Don't make LATENCYTOP select SCHED_DEBUG SCHED_DEBUG is not in fact required for LATENCYTOP, don't select it. Suggested-by: Mel Gorman Signed-off-by: Peter Zijlstra (Intel) Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210412102001.224578...@infradead.org --- lib/Kconfig.debug | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2779c29..5f98376 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1670,7 +1670,6 @@ config LATENCYTOP select KALLSYMS_ALL select STACKTRACE select SCHEDSTATS - select SCHED_DEBUG help Enable this option if you want to use the LatencyTOP tool to find out which userspace is blocking on what kernel operations.
[tip: sched/core] sched,fair: Alternative sched_slice()
The following commit has been merged into the sched/core branch of tip: Commit-ID: 0c2de3f054a59f15e01804b75a04355c48de628c Gitweb: https://git.kernel.org/tip/0c2de3f054a59f15e01804b75a04355c48de628c Author:Peter Zijlstra AuthorDate:Thu, 25 Mar 2021 13:44:46 +01:00 Committer: Peter Zijlstra CommitterDate: Fri, 16 Apr 2021 17:06:35 +02:00 sched,fair: Alternative sched_slice() The current sched_slice() seems to have issues; there's two possible things that could be improved: - the 'nr_running' used for __sched_period() is daft when cgroups are considered. Using the RQ wide h_nr_running seems like a much more consistent number. - (esp) cgroups can slice it real fine, which makes for easy over-scheduling, ensure min_gran is what the name says. Signed-off-by: Peter Zijlstra (Intel) Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210412102001.611897...@infradead.org --- kernel/sched/fair.c | 12 +++- kernel/sched/features.h | 3 +++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b3ea14c..49636a4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -687,7 +687,13 @@ static u64 __sched_period(unsigned long nr_running) */ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) { - u64 slice = __sched_period(cfs_rq->nr_running + !se->on_rq); + unsigned int nr_running = cfs_rq->nr_running; + u64 slice; + + if (sched_feat(ALT_PERIOD)) + nr_running = rq_of(cfs_rq)->cfs.h_nr_running; + + slice = __sched_period(nr_running + !se->on_rq); for_each_sched_entity(se) { struct load_weight *load; @@ -704,6 +710,10 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) } slice = __calc_delta(slice, se->load.weight, load); } + + if (sched_feat(BASE_SLICE)) + slice = max(slice, (u64)sysctl_sched_min_granularity); + return slice; } diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 422fa68..011c5ec 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -90,3 +90,6 @@ SCHED_FEAT(WA_BIAS, true) */ SCHED_FEAT(UTIL_EST, true) SCHED_FEAT(UTIL_EST_FASTUP, true) + +SCHED_FEAT(ALT_PERIOD, true) +SCHED_FEAT(BASE_SLICE, true)
[tip: sched/core] sched/debug: Rename the sched_debug parameter to sched_verbose
The following commit has been merged into the sched/core branch of tip: Commit-ID: a1b93fc0377e73dd54f819a993f83291324bb54a Gitweb: https://git.kernel.org/tip/a1b93fc0377e73dd54f819a993f83291324bb54a Author:Peter Zijlstra AuthorDate:Thu, 15 Apr 2021 18:23:17 +02:00 Committer: Peter Zijlstra CommitterDate: Fri, 16 Apr 2021 17:06:35 +02:00 sched/debug: Rename the sched_debug parameter to sched_verbose CONFIG_SCHED_DEBUG is the build-time Kconfig knob, the boot param sched_debug and the /debug/sched/debug_enabled knobs control the sched_debug_enabled variable, but what they really do is make SCHED_DEBUG more verbose, so rename the lot. Signed-off-by: Peter Zijlstra (Intel) --- Documentation/admin-guide/kernel-parameters.txt | 2 +- Documentation/scheduler/sched-domains.rst | 10 +- kernel/sched/debug.c| 4 ++-- kernel/sched/topology.c | 12 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 0454572..9e4c026 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4725,7 +4725,7 @@ sbni= [NET] Granch SBNI12 leased line adapter - sched_debug [KNL] Enables verbose scheduler debug messages. + sched_verbose [KNL] Enables verbose scheduler debug messages. schedstats= [KNL,X86] Enable or disable scheduled statistics. Allowed values are enable and disable. This feature diff --git a/Documentation/scheduler/sched-domains.rst b/Documentation/scheduler/sched-domains.rst index 8582fa5..14ea2f2 100644 --- a/Documentation/scheduler/sched-domains.rst +++ b/Documentation/scheduler/sched-domains.rst @@ -74,8 +74,8 @@ for a given topology level by creating a sched_domain_topology_level array and calling set_sched_topology() with this array as the parameter. The sched-domains debugging infrastructure can be enabled by enabling -CONFIG_SCHED_DEBUG and adding 'sched_debug' to your cmdline. If you forgot to -tweak your cmdline, you can also flip the /sys/kernel/debug/sched_debug -knob. This enables an error checking parse of the sched domains which should -catch most possible errors (described above). It also prints out the domain -structure in a visual format. +CONFIG_SCHED_DEBUG and adding 'sched_debug_verbose' to your cmdline. If you +forgot to tweak your cmdline, you can also flip the +/sys/kernel/debug/sched/verbose knob. This enables an error checking parse of +the sched domains which should catch most possible errors (described above). It +also prints out the domain structure in a visual format. diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index bf199d6..461342f 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -275,7 +275,7 @@ static const struct file_operations sched_dynamic_fops = { #endif /* CONFIG_PREEMPT_DYNAMIC */ -__read_mostly bool sched_debug_enabled; +__read_mostly bool sched_debug_verbose; static const struct seq_operations sched_debug_sops; @@ -300,7 +300,7 @@ static __init int sched_init_debug(void) debugfs_sched = debugfs_create_dir("sched", NULL); debugfs_create_file("features", 0644, debugfs_sched, NULL, _feat_fops); - debugfs_create_bool("debug_enabled", 0644, debugfs_sched, _debug_enabled); + debugfs_create_bool("verbose", 0644, debugfs_sched, _debug_verbose); #ifdef CONFIG_PREEMPT_DYNAMIC debugfs_create_file("preempt", 0644, debugfs_sched, NULL, _dynamic_fops); #endif diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index c343aed..55a0a24 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -14,15 +14,15 @@ static cpumask_var_t sched_domains_tmpmask2; static int __init sched_debug_setup(char *str) { - sched_debug_enabled = true; + sched_debug_verbose = true; return 0; } -early_param("sched_debug", sched_debug_setup); +early_param("sched_verbose", sched_debug_setup); static inline bool sched_debug(void) { - return sched_debug_enabled; + return sched_debug_verbose; } #define SD_FLAG(_name, mflags) [__##_name] = { .meta_flags = mflags, .name = #_name }, @@ -131,7 +131,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) { int level = 0; - if (!sched_debug_enabled) + if (!sched_debug_verbose) return; if (!sd) { @@ -152,7 +152,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) } #else /* !CONFIG_SCHED_DEBUG */ -# define sched_debug_enabled 0 +# define sched_debug_verbose 0 # define sched_domain_debug(sd, cpu) do { } while (0) static inline bool sched_debug(void) { @@ -2141,7 +2141,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att if
[tip: sched/core] sched,debug: Convert sysctl sched_domains to debugfs
The following commit has been merged into the sched/core branch of tip: Commit-ID: 3b87f136f8fccddf7da016ab7d04bb3cf9b180f0 Gitweb: https://git.kernel.org/tip/3b87f136f8fccddf7da016ab7d04bb3cf9b180f0 Author:Peter Zijlstra AuthorDate:Thu, 25 Mar 2021 11:31:20 +01:00 Committer: Peter Zijlstra CommitterDate: Fri, 16 Apr 2021 17:06:35 +02:00 sched,debug: Convert sysctl sched_domains to debugfs Stop polluting sysctl, move to debugfs for SCHED_DEBUG stuff. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dietmar Eggemann Reviewed-by: Valentin Schneider Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/YHgB/s4kcbq1i...@hirez.programming.kicks-ass.net --- kernel/sched/debug.c| 254 --- kernel/sched/sched.h| 10 +-- kernel/sched/topology.c | 6 +- 3 files changed, 59 insertions(+), 211 deletions(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index bdd344f..b25de7b 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -299,6 +299,10 @@ static __init int sched_init_debug(void) debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, _scaling_fops); debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, _sched_migration_cost); debugfs_create_u32("nr_migrate", 0644, debugfs_sched, _sched_nr_migrate); + + mutex_lock(_domains_mutex); + update_sched_domain_debugfs(); + mutex_unlock(_domains_mutex); #endif #ifdef CONFIG_NUMA_BALANCING @@ -316,229 +320,88 @@ late_initcall(sched_init_debug); #ifdef CONFIG_SMP -#ifdef CONFIG_SYSCTL - -static struct ctl_table sd_ctl_dir[] = { - { - .procname = "sched_domain", - .mode = 0555, - }, - {} -}; - -static struct ctl_table sd_ctl_root[] = { - { - .procname = "kernel", - .mode = 0555, - .child = sd_ctl_dir, - }, - {} -}; - -static struct ctl_table *sd_alloc_ctl_entry(int n) -{ - struct ctl_table *entry = - kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL); - - return entry; -} - -static void sd_free_ctl_entry(struct ctl_table **tablep) -{ - struct ctl_table *entry; - - /* -* In the intermediate directories, both the child directory and -* procname are dynamically allocated and could fail but the mode -* will always be set. In the lowest directory the names are -* static strings and all have proc handlers. -*/ - for (entry = *tablep; entry->mode; entry++) { - if (entry->child) - sd_free_ctl_entry(>child); - if (entry->proc_handler == NULL) - kfree(entry->procname); - } - - kfree(*tablep); - *tablep = NULL; -} - -static void -set_table_entry(struct ctl_table *entry, - const char *procname, void *data, int maxlen, - umode_t mode, proc_handler *proc_handler) -{ - entry->procname = procname; - entry->data = data; - entry->maxlen = maxlen; - entry->mode = mode; - entry->proc_handler = proc_handler; -} +static cpumask_var_t sd_sysctl_cpus; +static struct dentry *sd_dentry; -static int sd_ctl_doflags(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) +static int sd_flags_show(struct seq_file *m, void *v) { - unsigned long flags = *(unsigned long *)table->data; - size_t data_size = 0; - size_t len = 0; - char *tmp, *buf; + unsigned long flags = *(unsigned int *)m->private; int idx; - if (write) - return 0; - - for_each_set_bit(idx, , __SD_FLAG_CNT) { - char *name = sd_flag_debug[idx].name; - - /* Name plus whitespace */ - data_size += strlen(name) + 1; - } - - if (*ppos > data_size) { - *lenp = 0; - return 0; - } - - buf = kcalloc(data_size + 1, sizeof(*buf), GFP_KERNEL); - if (!buf) - return -ENOMEM; - for_each_set_bit(idx, , __SD_FLAG_CNT) { - char *name = sd_flag_debug[idx].name; - - len += snprintf(buf + len, strlen(name) + 2, "%s ", name); - } - - tmp = buf + *ppos; - len -= *ppos; - - if (len > *lenp) - len = *lenp; - if (len) - memcpy(buffer, tmp, len); - if (len < *lenp) { - ((char *)buffer)[len] = '\n'; - len++; + seq_puts(m, sd_flag_debug[idx].name); + seq_puts(m, " "); } - - *lenp = len; - *ppos += len; - - kfree(buf); + seq_puts(m, "\n"); return 0; } -static struct ctl_table * -sd_alloc_ctl_domain_table(struct sched_domain *sd) -{ - struct ctl_table *table =
[tip: sched/core] debugfs: Implement debugfs_create_str()
The following commit has been merged into the sched/core branch of tip: Commit-ID: 9af0440ec86ebdab075e1b3d231f81fe7decb575 Gitweb: https://git.kernel.org/tip/9af0440ec86ebdab075e1b3d231f81fe7decb575 Author:Peter Zijlstra AuthorDate:Thu, 25 Mar 2021 10:53:55 +01:00 Committer: Peter Zijlstra CommitterDate: Fri, 16 Apr 2021 17:06:34 +02:00 debugfs: Implement debugfs_create_str() Implement debugfs_create_str() to easily display names and such in debugfs. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Greg Kroah-Hartman Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210412102001.415407...@infradead.org --- fs/debugfs/file.c | 91 - include/linux/debugfs.h | 17 +++- 2 files changed, 108 insertions(+) diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 686e0ad..9b78e9e 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -865,6 +865,97 @@ struct dentry *debugfs_create_bool(const char *name, umode_t mode, } EXPORT_SYMBOL_GPL(debugfs_create_bool); +ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct dentry *dentry = F_DENTRY(file); + char *str, *copy = NULL; + int copy_len, len; + ssize_t ret; + + ret = debugfs_file_get(dentry); + if (unlikely(ret)) + return ret; + + str = *(char **)file->private_data; + len = strlen(str) + 1; + copy = kmalloc(len, GFP_KERNEL); + if (!copy) { + debugfs_file_put(dentry); + return -ENOMEM; + } + + copy_len = strscpy(copy, str, len); + debugfs_file_put(dentry); + if (copy_len < 0) { + kfree(copy); + return copy_len; + } + + copy[copy_len] = '\n'; + + ret = simple_read_from_buffer(user_buf, count, ppos, copy, copy_len); + kfree(copy); + + return ret; +} + +static ssize_t debugfs_write_file_str(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + /* This is really only for read-only strings */ + return -EINVAL; +} + +static const struct file_operations fops_str = { + .read = debugfs_read_file_str, + .write =debugfs_write_file_str, + .open = simple_open, + .llseek = default_llseek, +}; + +static const struct file_operations fops_str_ro = { + .read = debugfs_read_file_str, + .open = simple_open, + .llseek = default_llseek, +}; + +static const struct file_operations fops_str_wo = { + .write =debugfs_write_file_str, + .open = simple_open, + .llseek = default_llseek, +}; + +/** + * debugfs_create_str - create a debugfs file that is used to read and write a string value + * @name: a pointer to a string containing the name of the file to create. + * @mode: the permission that the file should have + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is %NULL, then the + * file will be created in the root of the debugfs filesystem. + * @value: a pointer to the variable that the file should read to and write + * from. + * + * This function creates a file in debugfs with the given name that + * contains the value of the variable @value. If the @mode variable is so + * set, it can be read from, and written to. + * + * This function will return a pointer to a dentry if it succeeds. This + * pointer must be passed to the debugfs_remove() function when the file is + * to be removed (no automatic cleanup happens if your module is unloaded, + * you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be + * returned. + * + * If debugfs is not enabled in the kernel, the value ERR_PTR(-ENODEV) will + * be returned. + */ +void debugfs_create_str(const char *name, umode_t mode, + struct dentry *parent, char **value) +{ + debugfs_create_mode_unsafe(name, mode, parent, value, _str, + _str_ro, _str_wo); +} + static ssize_t read_file_blob(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index d6c4cc9..1fdb434 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -128,6 +128,8 @@ void debugfs_create_atomic_t(const char *name, umode_t mode, struct dentry *parent, atomic_t *value); struct dentry *debugfs_create_bool(const char *name, umode_t mode, struct dentry *parent, bool *value); +void debugfs_create_str(const char *name, umode_t mode, + struct dentry *parent, char **value); struct dentry *debugfs_create_blob(const
[tip: sched/core] sched: Move /proc/sched_debug to debugfs
The following commit has been merged into the sched/core branch of tip: Commit-ID: d27e9ae2f244805bbdc730d85fba28685d2471e5 Gitweb: https://git.kernel.org/tip/d27e9ae2f244805bbdc730d85fba28685d2471e5 Author:Peter Zijlstra AuthorDate:Thu, 25 Mar 2021 15:18:19 +01:00 Committer: Peter Zijlstra CommitterDate: Fri, 16 Apr 2021 17:06:35 +02:00 sched: Move /proc/sched_debug to debugfs Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Greg Kroah-Hartman Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210412102001.548833...@infradead.org --- kernel/sched/debug.c | 25 - 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index b25de7b..bf199d6 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -277,6 +277,20 @@ static const struct file_operations sched_dynamic_fops = { __read_mostly bool sched_debug_enabled; +static const struct seq_operations sched_debug_sops; + +static int sched_debug_open(struct inode *inode, struct file *filp) +{ + return seq_open(filp, _debug_sops); +} + +static const struct file_operations sched_debug_fops = { + .open = sched_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release= seq_release, +}; + static struct dentry *debugfs_sched; static __init int sched_init_debug(void) @@ -314,6 +328,8 @@ static __init int sched_init_debug(void) debugfs_create_u32("scan_size_mb", 0644, numa, _numa_balancing_scan_size); #endif + debugfs_create_file("debug", 0444, debugfs_sched, NULL, _debug_fops); + return 0; } late_initcall(sched_init_debug); @@ -847,15 +863,6 @@ static const struct seq_operations sched_debug_sops = { .show = sched_debug_show, }; -static int __init init_sched_debug_procfs(void) -{ - if (!proc_create_seq("sched_debug", 0444, NULL, _debug_sops)) - return -ENOMEM; - return 0; -} - -__initcall(init_sched_debug_procfs); - #define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F)) #define __P(F) __PS(#F, F) #define P(F) __PS(#F, p->F)
[tip: perf/core] perf: Rework perf_event_exit_event()
The following commit has been merged into the perf/core branch of tip: Commit-ID: ef54c1a476aef7eef26fe13ea10dc090952c00f8 Gitweb: https://git.kernel.org/tip/ef54c1a476aef7eef26fe13ea10dc090952c00f8 Author:Peter Zijlstra AuthorDate:Thu, 08 Apr 2021 12:35:56 +02:00 Committer: Peter Zijlstra CommitterDate: Fri, 16 Apr 2021 16:32:40 +02:00 perf: Rework perf_event_exit_event() Make perf_event_exit_event() more robust, such that we can use it from other contexts. Specifically the up and coming remove_on_exec. For this to work we need to address a few issues. Remove_on_exec will not destroy the entire context, so we cannot rely on TASK_TOMBSTONE to disable event_function_call() and we thus have to use perf_remove_from_context(). When using perf_remove_from_context(), there's two races to consider. The first is against close(), where we can have concurrent tear-down of the event. The second is against child_list iteration, which should not find a half baked event. To address this, teach perf_remove_from_context() to special case !ctx->is_active and about DETACH_CHILD. [ el...@google.com: fix racing parent/child exit in sync_child_event(). ] Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210408103605.1676875-2-el...@google.com --- include/linux/perf_event.h | 1 +- kernel/events/core.c | 142 2 files changed, 80 insertions(+), 63 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3f7f89e..3d478ab 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -607,6 +607,7 @@ struct swevent_hlist { #define PERF_ATTACH_TASK_DATA 0x08 #define PERF_ATTACH_ITRACE 0x10 #define PERF_ATTACH_SCHED_CB 0x20 +#define PERF_ATTACH_CHILD 0x40 struct perf_cgroup; struct perf_buffer; diff --git a/kernel/events/core.c b/kernel/events/core.c index f079431..318ff7b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2205,6 +2205,26 @@ out: perf_event__header_size(leader); } +static void sync_child_event(struct perf_event *child_event); + +static void perf_child_detach(struct perf_event *event) +{ + struct perf_event *parent_event = event->parent; + + if (!(event->attach_state & PERF_ATTACH_CHILD)) + return; + + event->attach_state &= ~PERF_ATTACH_CHILD; + + if (WARN_ON_ONCE(!parent_event)) + return; + + lockdep_assert_held(_event->child_mutex); + + sync_child_event(event); + list_del_init(>child_list); +} + static bool is_orphaned_event(struct perf_event *event) { return event->state == PERF_EVENT_STATE_DEAD; @@ -2312,6 +2332,7 @@ group_sched_out(struct perf_event *group_event, } #define DETACH_GROUP 0x01UL +#define DETACH_CHILD 0x02UL /* * Cross CPU call to remove a performance event @@ -2335,6 +2356,8 @@ __perf_remove_from_context(struct perf_event *event, event_sched_out(event, cpuctx, ctx); if (flags & DETACH_GROUP) perf_group_detach(event); + if (flags & DETACH_CHILD) + perf_child_detach(event); list_del_event(event, ctx); if (!ctx->nr_events && ctx->is_active) { @@ -2363,25 +2386,21 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla lockdep_assert_held(>mutex); - event_function_call(event, __perf_remove_from_context, (void *)flags); - /* -* The above event_function_call() can NO-OP when it hits -* TASK_TOMBSTONE. In that case we must already have been detached -* from the context (by perf_event_exit_event()) but the grouping -* might still be in-tact. +* Because of perf_event_exit_task(), perf_remove_from_context() ought +* to work in the face of TASK_TOMBSTONE, unlike every other +* event_function_call() user. */ - WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); - if ((flags & DETACH_GROUP) && - (event->attach_state & PERF_ATTACH_GROUP)) { - /* -* Since in that case we cannot possibly be scheduled, simply -* detach now. -*/ - raw_spin_lock_irq(>lock); - perf_group_detach(event); + raw_spin_lock_irq(>lock); + if (!ctx->is_active) { + __perf_remove_from_context(event, __get_cpu_context(ctx), + ctx, (void *)flags); raw_spin_unlock_irq(>lock); + return; } + raw_spin_unlock_irq(>lock); + + event_function_call(event, __perf_remove_from_context, (void *)flags); } /* @@ -12377,14 +12396,17 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) } EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); -static void sync_child_event(struct perf_event *child_event, -
[tip: locking/core] static_call: Relax static_call_update() function argument type
The following commit has been merged into the locking/core branch of tip: Commit-ID: 9432bbd969c667fc9c4b1c140c5a745ff2a7b540 Gitweb: https://git.kernel.org/tip/9432bbd969c667fc9c4b1c140c5a745ff2a7b540 Author:Peter Zijlstra AuthorDate:Tue, 23 Mar 2021 16:49:03 +01:00 Committer: Peter Zijlstra CommitterDate: Fri, 09 Apr 2021 13:22:12 +02:00 static_call: Relax static_call_update() function argument type static_call_update() had stronger type requirements than regular C, relax them to match. Instead of requiring the @func argument has the exact matching type, allow any type which C is willing to promote to the right (function) pointer type. Specifically this allows (void *) arguments. This cleans up a bunch of static_call_update() callers for PREEMPT_DYNAMIC and should get around silly GCC11 warnings for free. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/yfon7ncl8ofgt...@hirez.programming.kicks-ass.net --- include/linux/static_call.h | 4 ++-- kernel/sched/core.c | 18 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 85ecc78..8d50f62 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -113,9 +113,9 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool #define static_call_update(name, func) \ ({ \ - BUILD_BUG_ON(!__same_type(*(func), STATIC_CALL_TRAMP(name))); \ + typeof(_CALL_TRAMP(name)) __F = (func); \ __static_call_update(_CALL_KEY(name),\ -STATIC_CALL_TRAMP_ADDR(name), func); \ +STATIC_CALL_TRAMP_ADDR(name), __F);\ }) #ifdef CONFIG_HAVE_STATIC_CALL_INLINE diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9819121..67f9890 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5396,25 +5396,25 @@ static void sched_dynamic_update(int mode) switch (mode) { case preempt_dynamic_none: static_call_update(cond_resched, __cond_resched); - static_call_update(might_resched, (typeof(&__cond_resched)) __static_call_return0); - static_call_update(preempt_schedule, (typeof(_schedule)) NULL); - static_call_update(preempt_schedule_notrace, (typeof(_schedule_notrace)) NULL); - static_call_update(irqentry_exit_cond_resched, (typeof(_exit_cond_resched)) NULL); + static_call_update(might_resched, (void *)&__static_call_return0); + static_call_update(preempt_schedule, NULL); + static_call_update(preempt_schedule_notrace, NULL); + static_call_update(irqentry_exit_cond_resched, NULL); pr_info("Dynamic Preempt: none\n"); break; case preempt_dynamic_voluntary: static_call_update(cond_resched, __cond_resched); static_call_update(might_resched, __cond_resched); - static_call_update(preempt_schedule, (typeof(_schedule)) NULL); - static_call_update(preempt_schedule_notrace, (typeof(_schedule_notrace)) NULL); - static_call_update(irqentry_exit_cond_resched, (typeof(_exit_cond_resched)) NULL); + static_call_update(preempt_schedule, NULL); + static_call_update(preempt_schedule_notrace, NULL); + static_call_update(irqentry_exit_cond_resched, NULL); pr_info("Dynamic Preempt: voluntary\n"); break; case preempt_dynamic_full: - static_call_update(cond_resched, (typeof(&__cond_resched)) __static_call_return0); - static_call_update(might_resched, (typeof(&__cond_resched)) __static_call_return0); + static_call_update(cond_resched, (void *)&__static_call_return0); + static_call_update(might_resched, (void *)&__static_call_return0); static_call_update(preempt_schedule, __preempt_schedule_func); static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched);
[tip: x86/core] x86/cpu: Resort and comment Intel models
The following commit has been merged into the x86/core branch of tip: Commit-ID: 53375a5a218e7ea0ac18087946b5391f749b764f Gitweb: https://git.kernel.org/tip/53375a5a218e7ea0ac18087946b5391f749b764f Author:Peter Zijlstra AuthorDate:Mon, 15 Mar 2021 17:12:53 +01:00 Committer: Borislav Petkov CommitterDate: Thu, 08 Apr 2021 14:22:10 +02:00 x86/cpu: Resort and comment Intel models The INTEL_FAM6 list has become a mess again. Try and bring some sanity back into it. Where previously we had one microarch per year and a number of SKUs within that, this no longer seems to be the case. We now get different uarch names that share a 'core' design. Add the core name starting at skylake and reorder to keep the cores in chronological order. Furthermore, Intel marketed the names {Amber, Coffee, Whiskey} Lake, but those are in fact steppings of Kaby Lake, add comments for them. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/ye+hhs8i0gshh...@hirez.programming.kicks-ass.net --- arch/x86/include/asm/intel-family.h | 50 +++- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 9abe842..b15262f 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -32,7 +32,9 @@ * _EP - 2 socket server parts * _EX - 4+ socket server parts * - * The #define line may optionally include a comment including platform names. + * The #define line may optionally include a comment including platform or core + * names. An exception is made for kabylake where steppings seem to have gotten + * their own names :-( */ /* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */ @@ -69,35 +71,39 @@ #define INTEL_FAM6_BROADWELL_X 0x4F #define INTEL_FAM6_BROADWELL_D 0x56 -#define INTEL_FAM6_SKYLAKE_L 0x4E -#define INTEL_FAM6_SKYLAKE 0x5E -#define INTEL_FAM6_SKYLAKE_X 0x55 -#define INTEL_FAM6_KABYLAKE_L 0x8E -#define INTEL_FAM6_KABYLAKE0x9E +#define INTEL_FAM6_SKYLAKE_L 0x4E/* Sky Lake */ +#define INTEL_FAM6_SKYLAKE 0x5E/* Sky Lake */ +#define INTEL_FAM6_SKYLAKE_X 0x55/* Sky Lake */ -#define INTEL_FAM6_CANNONLAKE_L0x66 +#define INTEL_FAM6_KABYLAKE_L 0x8E/* Sky Lake */ +/* AMBERLAKE_L 0x8E Sky Lake -- s: 9 */ +/* COFFEELAKE_L0x8E Sky Lake -- s: 10 */ +/* WHISKEYLAKE_L 0x8E Sky Lake -- s: 11,12 */ -#define INTEL_FAM6_ICELAKE_X 0x6A -#define INTEL_FAM6_ICELAKE_D 0x6C -#define INTEL_FAM6_ICELAKE 0x7D -#define INTEL_FAM6_ICELAKE_L 0x7E -#define INTEL_FAM6_ICELAKE_NNPI0x9D +#define INTEL_FAM6_KABYLAKE0x9E/* Sky Lake */ +/* COFFEELAKE 0x9E Sky Lake -- s: 10-13 */ -#define INTEL_FAM6_TIGERLAKE_L 0x8C -#define INTEL_FAM6_TIGERLAKE 0x8D +#define INTEL_FAM6_COMETLAKE 0xA5/* Sky Lake */ +#define INTEL_FAM6_COMETLAKE_L 0xA6/* Sky Lake */ -#define INTEL_FAM6_COMETLAKE 0xA5 -#define INTEL_FAM6_COMETLAKE_L 0xA6 +#define INTEL_FAM6_CANNONLAKE_L0x66/* Palm Cove */ -#define INTEL_FAM6_ROCKETLAKE 0xA7 +#define INTEL_FAM6_ICELAKE_X 0x6A/* Sunny Cove */ +#define INTEL_FAM6_ICELAKE_D 0x6C/* Sunny Cove */ +#define INTEL_FAM6_ICELAKE 0x7D/* Sunny Cove */ +#define INTEL_FAM6_ICELAKE_L 0x7E/* Sunny Cove */ +#define INTEL_FAM6_ICELAKE_NNPI0x9D/* Sunny Cove */ -#define INTEL_FAM6_SAPPHIRERAPIDS_X0x8F +#define INTEL_FAM6_LAKEFIELD 0x8A/* Sunny Cove / Tremont */ -/* Hybrid Core/Atom Processors */ +#define INTEL_FAM6_ROCKETLAKE 0xA7/* Cypress Cove */ -#defineINTEL_FAM6_LAKEFIELD0x8A -#define INTEL_FAM6_ALDERLAKE 0x97 -#define INTEL_FAM6_ALDERLAKE_L 0x9A +#define INTEL_FAM6_TIGERLAKE_L 0x8C/* Willow Cove */ +#define INTEL_FAM6_TIGERLAKE 0x8D/* Willow Cove */ +#define INTEL_FAM6_SAPPHIRERAPIDS_X0x8F/* Willow Cove */ + +#define INTEL_FAM6_ALDERLAKE 0x97/* Golden Cove / Gracemont */ +#define INTEL_FAM6_ALDERLAKE_L 0x9A/* Golden Cove / Gracemont */ /* "Small Core" Processors (Atom) */
[tip: x86/core] x86/retpoline: Simplify retpolines
The following commit has been merged into the x86/core branch of tip: Commit-ID: 119251855f9adf9421cb5eb409933092141ab2c7 Gitweb: https://git.kernel.org/tip/119251855f9adf9421cb5eb409933092141ab2c7 Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:02 +01:00 Committer: Ingo Molnar CommitterDate: Fri, 02 Apr 2021 12:42:04 +02:00 x86/retpoline: Simplify retpolines Due to: c9c324dc22aa ("objtool: Support stack layout changes in alternatives") it is now possible to simplify the retpolines. Currently our retpolines consist of 2 symbols: - __x86_indirect_thunk_\reg: the compiler target - __x86_retpoline_\reg: the actual retpoline. Both are consecutive in code and aligned such that for any one register they both live in the same cacheline: <__x86_indirect_thunk_rax>: 0: ff e0 jmpq *%rax 2: 90 nop 3: 90 nop 4: 90 nop 0005 <__x86_retpoline_rax>: 5: e8 07 00 00 00 callq 11 <__x86_retpoline_rax+0xc> a: f3 90 pause c: 0f ae e8lfence f: eb f9 jmpa <__x86_retpoline_rax+0x5> 11: 48 89 04 24 mov%rax,(%rsp) 15: c3 retq 16: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:0x0(%rax,%rax,1) The thunk is an alternative_2, where one option is a JMP to the retpoline. This was done so that objtool didn't need to deal with alternatives with stack ops. But that problem has been solved, so now it is possible to fold the entire retpoline into the alternative to simplify and consolidate unused bytes: <__x86_indirect_thunk_rax>: 0: ff e0 jmpq *%rax 2: 90 nop 3: 90 nop 4: 90 nop 5: 90 nop 6: 90 nop 7: 90 nop 8: 90 nop 9: 90 nop a: 90 nop b: 90 nop c: 90 nop d: 90 nop e: 90 nop f: 90 nop 10: 90 nop 11: 66 66 2e 0f 1f 84 00 00 00 00 00data16 nopw %cs:0x0(%rax,%rax,1) 1c: 0f 1f 40 00 nopl 0x0(%rax) Notice that since the longest alternative sequence is now: 0: e8 07 00 00 00 callq c <.altinstr_replacement+0xc> 5: f3 90 pause 7: 0f ae e8lfence a: eb f9 jmp5 <.altinstr_replacement+0x5> c: 48 89 04 24 mov%rax,(%rsp) 10: c3 retq 17 bytes, we have 15 bytes NOP at the end of our 32 byte slot. (IOW, if we can shrink the retpoline by 1 byte we can pack it more densely). [ bp: Massage commit message. ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20210326151259.506071...@infradead.org --- arch/x86/include/asm/asm-prototypes.h | 7 +- arch/x86/include/asm/nospec-branch.h | 6 ++--- arch/x86/lib/retpoline.S | 34 +- tools/objtool/check.c | 3 +-- 4 files changed, 21 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 51e2bf2..0545b07 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -22,15 +22,8 @@ extern void cmpxchg8b_emu(void); #define DECL_INDIRECT_THUNK(reg) \ extern asmlinkage void __x86_indirect_thunk_ ## reg (void); -#define DECL_RETPOLINE(reg) \ - extern asmlinkage void __x86_retpoline_ ## reg (void); - #undef GEN #define GEN(reg) DECL_INDIRECT_THUNK(reg) #include -#undef GEN -#define GEN(reg) DECL_RETPOLINE(reg) -#include - #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 529f8e9..664be73 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -80,7 +80,7 @@ .macro JMP_NOSPEC reg:req #ifdef CONFIG_RETPOLINE ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ - __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \ + __stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD #else jmp *%\reg @@ -90,7 +90,7 @@ .macro CALL_NOSPEC reg:req #ifdef CONFIG_RETPOLINE ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \ - __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \ +
[tip: x86/core] x86/alternatives: Optimize optimize_nops()
The following commit has been merged into the x86/core branch of tip: Commit-ID: 23c1ad538f4f371bdb67d8a112314842d5db7e5a Gitweb: https://git.kernel.org/tip/23c1ad538f4f371bdb67d8a112314842d5db7e5a Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:01 +01:00 Committer: Ingo Molnar CommitterDate: Fri, 02 Apr 2021 12:41:17 +02:00 x86/alternatives: Optimize optimize_nops() Currently, optimize_nops() scans to see if the alternative starts with NOPs. However, the emit pattern is: 141: \oldinstr 142: .skip (len-(142b-141b)), 0x90 That is, when 'oldinstr' is short, the tail is padded with NOPs. This case never gets optimized. Rewrite optimize_nops() to replace any trailing string of NOPs inside the alternative to larger NOPs. Also run it irrespective of patching, replacing NOPs in both the original and replaced code. A direct consequence is that 'padlen' becomes superfluous, so remove it. [ bp: - Adjust commit message - remove a stale comment about needing to pad - add a comment in optimize_nops() - exit early if the NOP verif. loop catches a mismatch - function should not not add NOPs in that case - fix the "optimized NOPs" offsets output ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20210326151259.442992...@infradead.org --- arch/x86/include/asm/alternative.h| 17 +- arch/x86/kernel/alternative.c | 49 +++--- tools/objtool/arch/x86/include/arch/special.h | 2 +- 3 files changed, 37 insertions(+), 31 deletions(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 17b3609..a3c2315 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -65,7 +65,6 @@ struct alt_instr { u16 cpuid; /* cpuid bit set for replacement */ u8 instrlen; /* length of original instruction */ u8 replacementlen; /* length of new instruction */ - u8 padlen; /* length of build-time padding */ } __packed; /* @@ -104,7 +103,6 @@ static inline int alternatives_text_reserved(void *start, void *end) #define alt_end_marker "663" #define alt_slen "662b-661b" -#define alt_pad_lenalt_end_marker"b-662b" #define alt_total_slen alt_end_marker"b-661b" #define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" @@ -151,8 +149,7 @@ static inline int alternatives_text_reserved(void *start, void *end) " .long " b_replacement(num)"f - .\n" /* new instruction */ \ " .word " __stringify(feature) "\n" /* feature bit */ \ " .byte " alt_total_slen "\n" /* source len */ \ - " .byte " alt_rlen(num) "\n"/* replacement len */ \ - " .byte " alt_pad_len "\n" /* pad len */ + " .byte " alt_rlen(num) "\n"/* replacement len */ #define ALTINSTR_REPLACEMENT(newinstr, num)/* replacement */ \ "# ALT: replacement " #num "\n" \ @@ -224,9 +221,6 @@ static inline int alternatives_text_reserved(void *start, void *end) * Peculiarities: * No memory clobber here. * Argument numbers start with 1. - * Best is to use constraints that are fixed size (like (%1) ... "r") - * If you use variable sized constraints like "m" or "g" in the - * replacement make sure to pad to the worst case length. * Leaving an unused argument 0 to keep API compatibility. */ #define alternative_input(oldinstr, newinstr, feature, input...) \ @@ -315,13 +309,12 @@ static inline int alternatives_text_reserved(void *start, void *end) * enough information for the alternatives patching code to patch an * instruction. See apply_alternatives(). */ -.macro altinstruction_entry orig alt feature orig_len alt_len pad_len +.macro altinstruction_entry orig alt feature orig_len alt_len .long \orig - . .long \alt - . .word \feature .byte \orig_len .byte \alt_len - .byte \pad_len .endm /* @@ -338,7 +331,7 @@ static inline int alternatives_text_reserved(void *start, void *end) 142: .pushsection .altinstructions,"a" - altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b + altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f .popsection .pushsection .altinstr_replacement,"ax" @@ -375,8 +368,8 @@ static inline int alternatives_text_reserved(void *start, void *end) 142: .pushsection .altinstructions,"a" - altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b - altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b + altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f +
[tip: x86/core] objtool: Handle per arch retpoline naming
The following commit has been merged into the x86/core branch of tip: Commit-ID: 530b4ddd9dd92b263081f5c7786d39a8129c8b2d Gitweb: https://git.kernel.org/tip/530b4ddd9dd92b263081f5c7786d39a8129c8b2d Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:04 +01:00 Committer: Ingo Molnar CommitterDate: Fri, 02 Apr 2021 12:43:02 +02:00 objtool: Handle per arch retpoline naming The __x86_indirect_ naming is obviously not generic. Shorten to allow matching some additional magic names later. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Ingo Molnar Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151259.630296...@infradead.org --- tools/objtool/arch/x86/decode.c | 5 + tools/objtool/check.c| 9 +++-- tools/objtool/include/objtool/arch.h | 2 ++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index ba9ebff..782894e 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -648,3 +648,8 @@ int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg) return 0; } + +bool arch_is_retpoline(struct symbol *sym) +{ + return !strncmp(sym->name, "__x86_indirect_", 15); +} diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 519af4b..6fbc001 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -850,6 +850,11 @@ static int add_ignore_alternatives(struct objtool_file *file) return 0; } +__weak bool arch_is_retpoline(struct symbol *sym) +{ + return false; +} + /* * Find the destination instructions for all jumps. */ @@ -872,7 +877,7 @@ static int add_jump_destinations(struct objtool_file *file) } else if (reloc->sym->type == STT_SECTION) { dest_sec = reloc->sym->sec; dest_off = arch_dest_reloc_offset(reloc->addend); - } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) { + } else if (arch_is_retpoline(reloc->sym)) { /* * Retpoline jumps are really dynamic jumps in * disguise, so convert them accordingly. @@ -1026,7 +1031,7 @@ static int add_call_destinations(struct objtool_file *file) return -1; } - } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) { + } else if (arch_is_retpoline(reloc->sym)) { /* * Retpoline calls are really dynamic calls in * disguise, so convert them accordingly. diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 6ff0685..bb30993 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -86,4 +86,6 @@ const char *arch_nop_insn(int len); int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg); +bool arch_is_retpoline(struct symbol *sym); + #endif /* _ARCH_H */
[tip: x86/core] objtool: Correctly handle retpoline thunk calls
The following commit has been merged into the x86/core branch of tip: Commit-ID: bcb1b6ff39da7e8a6a986eb08126fba2b5e13c32 Gitweb: https://git.kernel.org/tip/bcb1b6ff39da7e8a6a986eb08126fba2b5e13c32 Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:03 +01:00 Committer: Ingo Molnar CommitterDate: Fri, 02 Apr 2021 12:42:54 +02:00 objtool: Correctly handle retpoline thunk calls Just like JMP handling, convert a direct CALL to a retpoline thunk into a retpoline safe indirect CALL. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Ingo Molnar Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151259.567568...@infradead.org --- tools/objtool/check.c | 12 1 file changed, 12 insertions(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index d45f018..519af4b 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1025,6 +1025,18 @@ static int add_call_destinations(struct objtool_file *file) dest_off); return -1; } + + } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) { + /* +* Retpoline calls are really dynamic calls in +* disguise, so convert them accordingly. +*/ + insn->type = INSN_CALL_DYNAMIC; + insn->retpoline_safe = true; + + remove_insn_ops(insn); + continue; + } else insn->call_dest = reloc->sym;
[tip: x86/core] objtool: Rework the elf_rebuild_reloc_section() logic
The following commit has been merged into the x86/core branch of tip: Commit-ID: 3a647607b57ad8346e659ddd3b951ac292c83690 Gitweb: https://git.kernel.org/tip/3a647607b57ad8346e659ddd3b951ac292c83690 Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:06 +01:00 Committer: Ingo Molnar CommitterDate: Fri, 02 Apr 2021 12:43:32 +02:00 objtool: Rework the elf_rebuild_reloc_section() logic Instead of manually calling elf_rebuild_reloc_section() on sections we've called elf_add_reloc() on, have elf_write() DTRT. This makes it easier to add random relocations in places without carefully tracking when we're done and need to flush what section. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Ingo Molnar Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151259.754213...@infradead.org --- tools/objtool/check.c | 6 -- tools/objtool/elf.c | 20 ++-- tools/objtool/include/objtool/elf.h | 1 - tools/objtool/orc_gen.c | 3 --- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 8618d03..1d0415b 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -542,9 +542,6 @@ static int create_static_call_sections(struct objtool_file *file) idx++; } - if (elf_rebuild_reloc_section(file->elf, reloc_sec)) - return -1; - return 0; } @@ -614,9 +611,6 @@ static int create_mcount_loc_sections(struct objtool_file *file) idx++; } - if (elf_rebuild_reloc_section(file->elf, reloc_sec)) - return -1; - return 0; } diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 93fa833..374813e 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -479,6 +479,8 @@ void elf_add_reloc(struct elf *elf, struct reloc *reloc) list_add_tail(>list, >reloc_list); elf_hash_add(elf->reloc_hash, >hash, reloc_hash(reloc)); + + sec->changed = true; } static int read_rel_reloc(struct section *sec, int i, struct reloc *reloc, unsigned int *symndx) @@ -558,7 +560,9 @@ static int read_relocs(struct elf *elf) return -1; } - elf_add_reloc(elf, reloc); + list_add_tail(>list, >reloc_list); + elf_hash_add(elf->reloc_hash, >hash, reloc_hash(reloc)); + nr_reloc++; } max_reloc = max(max_reloc, nr_reloc); @@ -873,14 +877,11 @@ static int elf_rebuild_rela_reloc_section(struct section *sec, int nr) return 0; } -int elf_rebuild_reloc_section(struct elf *elf, struct section *sec) +static int elf_rebuild_reloc_section(struct elf *elf, struct section *sec) { struct reloc *reloc; int nr; - sec->changed = true; - elf->changed = true; - nr = 0; list_for_each_entry(reloc, >reloc_list, list) nr++; @@ -944,9 +945,15 @@ int elf_write(struct elf *elf) struct section *sec; Elf_Scn *s; - /* Update section headers for changed sections: */ + /* Update changed relocation sections and section headers: */ list_for_each_entry(sec, >sections, list) { if (sec->changed) { + if (sec->base && + elf_rebuild_reloc_section(elf, sec)) { + WARN("elf_rebuild_reloc_section"); + return -1; + } + s = elf_getscn(elf->elf, sec->idx); if (!s) { WARN_ELF("elf_getscn"); @@ -958,6 +965,7 @@ int elf_write(struct elf *elf) } sec->changed = false; + elf->changed = true; } } diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index e6890cc..fc576ed 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -142,7 +142,6 @@ struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *se struct symbol *find_func_containing(struct section *sec, unsigned long offset); void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset, struct reloc *reloc); -int elf_rebuild_reloc_section(struct elf *elf, struct section *sec); #define for_each_sec(file, sec) \ list_for_each_entry(sec, >elf->sections, list) diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 738aa50..f534708 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -254,8 +254,5 @@ int orc_create(struct objtool_file *file) return -1;
[tip: x86/core] objtool: Fix static_call list generation
The following commit has been merged into the x86/core branch of tip: Commit-ID: a958c4fea768d2c378c89032ab41d38da2a24422 Gitweb: https://git.kernel.org/tip/a958c4fea768d2c378c89032ab41d38da2a24422 Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:05 +01:00 Committer: Ingo Molnar CommitterDate: Fri, 02 Apr 2021 12:43:19 +02:00 objtool: Fix static_call list generation Currently, objtool generates tail call entries in add_jump_destination() but waits until validate_branch() to generate the regular call entries. Move these to add_call_destination() for consistency. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Ingo Molnar Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151259.691529...@infradead.org --- tools/objtool/check.c | 17 - 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 6fbc001..8618d03 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1045,6 +1045,11 @@ static int add_call_destinations(struct objtool_file *file) } else insn->call_dest = reloc->sym; + if (insn->call_dest && insn->call_dest->static_call_tramp) { + list_add_tail(>static_call_node, + >static_call_list); + } + /* * Many compilers cannot disable KCOV with a function attribute * so they need a little help, NOP out any KCOV calls from noinstr @@ -1788,6 +1793,9 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + /* +* Must be before add_{jump_call}_destination. +*/ ret = read_static_call_tramps(file); if (ret) return ret; @@ -1800,6 +1808,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + /* +* Must be before add_call_destination(); it changes INSN_CALL to +* INSN_JUMP. +*/ ret = read_intra_function_calls(file); if (ret) return ret; @@ -2762,11 +2774,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (dead_end_function(file, insn->call_dest)) return 0; - if (insn->type == INSN_CALL && insn->call_dest->static_call_tramp) { - list_add_tail(>static_call_node, - >static_call_list); - } - break; case INSN_JUMP_CONDITIONAL:
[tip: x86/core] objtool: Create reloc sections implicitly
The following commit has been merged into the x86/core branch of tip: Commit-ID: d0c5c4cc73da0b05b0d9e5f833f2d859e1b45f8e Gitweb: https://git.kernel.org/tip/d0c5c4cc73da0b05b0d9e5f833f2d859e1b45f8e Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:08 +01:00 Committer: Ingo Molnar CommitterDate: Fri, 02 Apr 2021 12:44:37 +02:00 objtool: Create reloc sections implicitly Have elf_add_reloc() create the relocation section implicitly. Suggested-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Ingo Molnar Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151259.880174...@infradead.org --- tools/objtool/check.c | 6 -- tools/objtool/elf.c | 9 - tools/objtool/include/objtool/elf.h | 1 - tools/objtool/orc_gen.c | 2 -- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 61fe29a..600fa67 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -459,9 +459,6 @@ static int create_static_call_sections(struct objtool_file *file) if (!sec) return -1; - if (!elf_create_reloc_section(file->elf, sec, SHT_RELA)) - return -1; - idx = 0; list_for_each_entry(insn, >static_call_list, static_call_node) { @@ -547,9 +544,6 @@ static int create_mcount_loc_sections(struct objtool_file *file) if (!sec) return -1; - if (!elf_create_reloc_section(file->elf, sec, SHT_RELA)) - return -1; - idx = 0; list_for_each_entry(insn, >mcount_loc_list, mcount_loc_node) { diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 0ab52ac..7b65ae3 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -447,11 +447,18 @@ err: return -1; } +static struct section *elf_create_reloc_section(struct elf *elf, + struct section *base, + int reltype); + int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, unsigned int type, struct symbol *sym, int addend) { struct reloc *reloc; + if (!sec->reloc && !elf_create_reloc_section(elf, sec, SHT_RELA)) + return -1; + reloc = malloc(sizeof(*reloc)); if (!reloc) { perror("malloc"); @@ -829,7 +836,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec return sec; } -struct section *elf_create_reloc_section(struct elf *elf, +static struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype) { diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 825ad32..463f329 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -122,7 +122,6 @@ static inline u32 reloc_hash(struct reloc *reloc) struct elf *elf_open_read(const char *name, int flags); struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); -struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype); int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, unsigned int type, struct symbol *sym, int addend); diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 1b57be6..dc9b7dd 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -225,8 +225,6 @@ int orc_create(struct objtool_file *file) sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr); if (!sec) return -1; - if (!elf_create_reloc_section(file->elf, sec, SHT_RELA)) - return -1; /* Write ORC entries to sections: */ list_for_each_entry(entry, _list, list) {
[tip: x86/core] objtool: Add elf_create_reloc() helper
The following commit has been merged into the x86/core branch of tip: Commit-ID: ef47cc01cb4abcd760d8ac66b9361d6ade4d0846 Gitweb: https://git.kernel.org/tip/ef47cc01cb4abcd760d8ac66b9361d6ade4d0846 Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:07 +01:00 Committer: Ingo Molnar CommitterDate: Fri, 02 Apr 2021 12:44:18 +02:00 objtool: Add elf_create_reloc() helper We have 4 instances of adding a relocation. Create a common helper to avoid growing even more. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Ingo Molnar Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151259.817438...@infradead.org --- tools/objtool/check.c | 78 + tools/objtool/elf.c | 86 ++-- tools/objtool/include/objtool/elf.h | 10 ++- tools/objtool/orc_gen.c | 30 ++ 4 files changed, 85 insertions(+), 119 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 1d0415b..61fe29a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -433,8 +433,7 @@ reachable: static int create_static_call_sections(struct objtool_file *file) { - struct section *sec, *reloc_sec; - struct reloc *reloc; + struct section *sec; struct static_call_site *site; struct instruction *insn; struct symbol *key_sym; @@ -460,8 +459,7 @@ static int create_static_call_sections(struct objtool_file *file) if (!sec) return -1; - reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA); - if (!reloc_sec) + if (!elf_create_reloc_section(file->elf, sec, SHT_RELA)) return -1; idx = 0; @@ -471,25 +469,11 @@ static int create_static_call_sections(struct objtool_file *file) memset(site, 0, sizeof(struct static_call_site)); /* populate reloc for 'addr' */ - reloc = malloc(sizeof(*reloc)); - - if (!reloc) { - perror("malloc"); - return -1; - } - memset(reloc, 0, sizeof(*reloc)); - - insn_to_reloc_sym_addend(insn->sec, insn->offset, reloc); - if (!reloc->sym) { - WARN_FUNC("static call tramp: missing containing symbol", - insn->sec, insn->offset); + if (elf_add_reloc_to_insn(file->elf, sec, + idx * sizeof(struct static_call_site), + R_X86_64_PC32, + insn->sec, insn->offset)) return -1; - } - - reloc->type = R_X86_64_PC32; - reloc->offset = idx * sizeof(struct static_call_site); - reloc->sec = reloc_sec; - elf_add_reloc(file->elf, reloc); /* find key symbol */ key_name = strdup(insn->call_dest->name); @@ -526,18 +510,11 @@ static int create_static_call_sections(struct objtool_file *file) free(key_name); /* populate reloc for 'key' */ - reloc = malloc(sizeof(*reloc)); - if (!reloc) { - perror("malloc"); + if (elf_add_reloc(file->elf, sec, + idx * sizeof(struct static_call_site) + 4, + R_X86_64_PC32, key_sym, + is_sibling_call(insn) * STATIC_CALL_SITE_TAIL)) return -1; - } - memset(reloc, 0, sizeof(*reloc)); - reloc->sym = key_sym; - reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0; - reloc->type = R_X86_64_PC32; - reloc->offset = idx * sizeof(struct static_call_site) + 4; - reloc->sec = reloc_sec; - elf_add_reloc(file->elf, reloc); idx++; } @@ -547,8 +524,7 @@ static int create_static_call_sections(struct objtool_file *file) static int create_mcount_loc_sections(struct objtool_file *file) { - struct section *sec, *reloc_sec; - struct reloc *reloc; + struct section *sec; unsigned long *loc; struct instruction *insn; int idx; @@ -571,8 +547,7 @@ static int create_mcount_loc_sections(struct objtool_file *file) if (!sec) return -1; - reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA); - if (!reloc_sec) + if (!elf_create_reloc_section(file->elf, sec, SHT_RELA)) return -1; idx = 0; @@ -581,32 +556,11 @@ static int create_mcount_loc_sections(struct objtool_file *file) loc = (unsigned long *)sec->data->d_buf + idx; memset(loc, 0, sizeof(unsigned
[tip: x86/core] objtool: Extract elf_strtab_concat()
The following commit has been merged into the x86/core branch of tip: Commit-ID: 417a4dc91e559f92404c2544f785b02ce75784c3 Gitweb: https://git.kernel.org/tip/417a4dc91e559f92404c2544f785b02ce75784c3 Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:09 +01:00 Committer: Ingo Molnar CommitterDate: Fri, 02 Apr 2021 12:44:56 +02:00 objtool: Extract elf_strtab_concat() Create a common helper to append strings to a strtab. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Ingo Molnar Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151259.941474...@infradead.org --- tools/objtool/elf.c | 60 +++- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 7b65ae3..c278a04 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -673,13 +673,48 @@ err: return NULL; } +static int elf_add_string(struct elf *elf, struct section *strtab, char *str) +{ + Elf_Data *data; + Elf_Scn *s; + int len; + + if (!strtab) + strtab = find_section_by_name(elf, ".strtab"); + if (!strtab) { + WARN("can't find .strtab section"); + return -1; + } + + s = elf_getscn(elf->elf, strtab->idx); + if (!s) { + WARN_ELF("elf_getscn"); + return -1; + } + + data = elf_newdata(s); + if (!data) { + WARN_ELF("elf_newdata"); + return -1; + } + + data->d_buf = str; + data->d_size = strlen(str) + 1; + data->d_align = 1; + + len = strtab->len; + strtab->len += data->d_size; + strtab->changed = true; + + return len; +} + struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr) { struct section *sec, *shstrtab; size_t size = entsize * nr; Elf_Scn *s; - Elf_Data *data; sec = malloc(sizeof(*sec)); if (!sec) { @@ -736,7 +771,6 @@ struct section *elf_create_section(struct elf *elf, const char *name, sec->sh.sh_addralign = 1; sec->sh.sh_flags = SHF_ALLOC | sh_flags; - /* Add section name to .shstrtab (or .strtab for Clang) */ shstrtab = find_section_by_name(elf, ".shstrtab"); if (!shstrtab) @@ -745,27 +779,9 @@ struct section *elf_create_section(struct elf *elf, const char *name, WARN("can't find .shstrtab or .strtab section"); return NULL; } - - s = elf_getscn(elf->elf, shstrtab->idx); - if (!s) { - WARN_ELF("elf_getscn"); + sec->sh.sh_name = elf_add_string(elf, shstrtab, sec->name); + if (sec->sh.sh_name == -1) return NULL; - } - - data = elf_newdata(s); - if (!data) { - WARN_ELF("elf_newdata"); - return NULL; - } - - data->d_buf = sec->name; - data->d_size = strlen(name) + 1; - data->d_align = 1; - - sec->sh.sh_name = shstrtab->len; - - shstrtab->len += strlen(name) + 1; - shstrtab->changed = true; list_add_tail(>list, >sections); elf_hash_add(elf->section_hash, >hash, sec->idx);
[tip: x86/core] objtool: Extract elf_symbol_add()
The following commit has been merged into the x86/core branch of tip: Commit-ID: 9a7827b7789c630c1efdb121daa42c6e77dce97f Gitweb: https://git.kernel.org/tip/9a7827b7789c630c1efdb121daa42c6e77dce97f Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:10 +01:00 Committer: Ingo Molnar CommitterDate: Fri, 02 Apr 2021 12:45:01 +02:00 objtool: Extract elf_symbol_add() Create a common helper to add symbols. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Ingo Molnar Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151300.003468...@infradead.org --- tools/objtool/elf.c | 56 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index c278a04..8457218 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -290,12 +290,39 @@ static int read_sections(struct elf *elf) return 0; } +static void elf_add_symbol(struct elf *elf, struct symbol *sym) +{ + struct list_head *entry; + struct rb_node *pnode; + + sym->type = GELF_ST_TYPE(sym->sym.st_info); + sym->bind = GELF_ST_BIND(sym->sym.st_info); + + sym->offset = sym->sym.st_value; + sym->len = sym->sym.st_size; + + rb_add(>node, >sec->symbol_tree, symbol_to_offset); + pnode = rb_prev(>node); + if (pnode) + entry = _entry(pnode, struct symbol, node)->list; + else + entry = >sec->symbol_list; + list_add(>list, entry); + elf_hash_add(elf->symbol_hash, >hash, sym->idx); + elf_hash_add(elf->symbol_name_hash, >name_hash, str_hash(sym->name)); + + /* +* Don't store empty STT_NOTYPE symbols in the rbtree. They +* can exist within a function, confusing the sorting. +*/ + if (!sym->len) + rb_erase(>node, >sec->symbol_tree); +} + static int read_symbols(struct elf *elf) { struct section *symtab, *symtab_shndx, *sec; struct symbol *sym, *pfunc; - struct list_head *entry; - struct rb_node *pnode; int symbols_nr, i; char *coldstr; Elf_Data *shndx_data = NULL; @@ -340,9 +367,6 @@ static int read_symbols(struct elf *elf) goto err; } - sym->type = GELF_ST_TYPE(sym->sym.st_info); - sym->bind = GELF_ST_BIND(sym->sym.st_info); - if ((sym->sym.st_shndx > SHN_UNDEF && sym->sym.st_shndx < SHN_LORESERVE) || (shndx_data && sym->sym.st_shndx == SHN_XINDEX)) { @@ -355,32 +379,14 @@ static int read_symbols(struct elf *elf) sym->name); goto err; } - if (sym->type == STT_SECTION) { + if (GELF_ST_TYPE(sym->sym.st_info) == STT_SECTION) { sym->name = sym->sec->name; sym->sec->sym = sym; } } else sym->sec = find_section_by_index(elf, 0); - sym->offset = sym->sym.st_value; - sym->len = sym->sym.st_size; - - rb_add(>node, >sec->symbol_tree, symbol_to_offset); - pnode = rb_prev(>node); - if (pnode) - entry = _entry(pnode, struct symbol, node)->list; - else - entry = >sec->symbol_list; - list_add(>list, entry); - elf_hash_add(elf->symbol_hash, >hash, sym->idx); - elf_hash_add(elf->symbol_name_hash, >name_hash, str_hash(sym->name)); - - /* -* Don't store empty STT_NOTYPE symbols in the rbtree. They -* can exist within a function, confusing the sorting. -*/ - if (!sym->len) - rb_erase(>node, >sec->symbol_tree); + elf_add_symbol(elf, sym); } if (stats)
[tip: x86/core] objtool/x86: Rewrite retpoline thunk calls
The following commit has been merged into the x86/core branch of tip: Commit-ID: 9bc0bb50727c8ac69fbb33fb937431cf3518ff37 Gitweb: https://git.kernel.org/tip/9bc0bb50727c8ac69fbb33fb937431cf3518ff37 Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:15 +01:00 Committer: Ingo Molnar CommitterDate: Fri, 02 Apr 2021 12:47:28 +02:00 objtool/x86: Rewrite retpoline thunk calls When the compiler emits: "CALL __x86_indirect_thunk_\reg" for an indirect call, have objtool rewrite it to: ALTERNATIVE "call __x86_indirect_thunk_\reg", "call *%reg", ALT_NOT(X86_FEATURE_RETPOLINE) Additionally, in order to not emit endless identical .altinst_replacement chunks, use a global symbol for them, see __x86_indirect_alt_*. This also avoids objtool from having to do code generation. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Ingo Molnar Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151300.320177...@infradead.org --- arch/x86/include/asm/asm-prototypes.h | 12 ++- arch/x86/lib/retpoline.S | 41 - tools/objtool/arch/x86/decode.c | 117 +- 3 files changed, 167 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 0545b07..4cb726c 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -19,11 +19,19 @@ extern void cmpxchg8b_emu(void); #ifdef CONFIG_RETPOLINE -#define DECL_INDIRECT_THUNK(reg) \ +#undef GEN +#define GEN(reg) \ extern asmlinkage void __x86_indirect_thunk_ ## reg (void); +#include + +#undef GEN +#define GEN(reg) \ + extern asmlinkage void __x86_indirect_alt_call_ ## reg (void); +#include #undef GEN -#define GEN(reg) DECL_INDIRECT_THUNK(reg) +#define GEN(reg) \ + extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void); #include #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index d2c0d14..4d32cb0 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -10,6 +10,8 @@ #include #include + .section .text.__x86.indirect_thunk + .macro RETPOLINE reg ANNOTATE_INTRA_FUNCTION_CALL call.Ldo_rop_\@ @@ -25,9 +27,9 @@ .endm .macro THUNK reg - .section .text.__x86.indirect_thunk .align 32 + SYM_FUNC_START(__x86_indirect_thunk_\reg) ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ @@ -39,6 +41,32 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg) .endm /* + * This generates .altinstr_replacement symbols for use by objtool. They, + * however, must not actually live in .altinstr_replacement since that will be + * discarded after init, but module alternatives will also reference these + * symbols. + * + * Their names matches the "__x86_indirect_" prefix to mark them as retpolines. + */ +.macro ALT_THUNK reg + + .align 1 + +SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg) + ANNOTATE_RETPOLINE_SAFE +1: call*%\reg +2: .skip 5-(2b-1b), 0x90 +SYM_FUNC_END(__x86_indirect_alt_call_\reg) + +SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg) + ANNOTATE_RETPOLINE_SAFE +1: jmp *%\reg +2: .skip 5-(2b-1b), 0x90 +SYM_FUNC_END(__x86_indirect_alt_jmp_\reg) + +.endm + +/* * Despite being an assembler file we can't just use .irp here * because __KSYM_DEPS__ only uses the C preprocessor and would * only see one instance of "__x86_indirect_thunk_\reg" rather @@ -61,3 +89,14 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg) #define GEN(reg) EXPORT_THUNK(reg) #include +#undef GEN +#define GEN(reg) ALT_THUNK reg +#include + +#undef GEN +#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_call_ ## reg) +#include + +#undef GEN +#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_jmp_ ## reg) +#include diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 782894e..7e8b5be 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -19,6 +19,7 @@ #include #include #include +#include static unsigned char op_to_cfi_reg[][2] = { {CFI_AX, CFI_R8}, @@ -613,6 +614,122 @@ const char *arch_nop_insn(int len) return nops[len-1]; } +/* asm/alternative.h ? */ + +#define ALTINSTR_FLAG_INV (1 << 15) +#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV) + +struct alt_instr { + s32 instr_offset; /* original instruction */ + s32 repl_offset;/* offset to replacement instruction */ + u16 cpuid; /* cpuid bit set for replacement */ + u8 instrlen; /* length of original instruction */ + u8 replacementlen; /* length of new instruction */ +} __packed; + +static int elf_add_alternative(struct elf *elf, + struct instruction *orig, struct symbol *sym, +
[tip: x86/core] objtool: Keep track of retpoline call sites
The following commit has been merged into the x86/core branch of tip: Commit-ID: 43d5430ad74ef5156353af7aec352426ec7a8e57 Gitweb: https://git.kernel.org/tip/43d5430ad74ef5156353af7aec352426ec7a8e57 Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:12 +01:00 Committer: Ingo Molnar CommitterDate: Fri, 02 Apr 2021 12:45:27 +02:00 objtool: Keep track of retpoline call sites Provide infrastructure for architectures to rewrite/augment compiler generated retpoline calls. Similar to what we do for static_call()s, keep track of the instructions that are retpoline calls. Use the same list_head, since a retpoline call cannot also be a static_call. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Ingo Molnar Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151300.130805...@infradead.org --- tools/objtool/check.c | 34 tools/objtool/include/objtool/arch.h| 2 +- tools/objtool/include/objtool/check.h | 2 +- tools/objtool/include/objtool/objtool.h | 1 +- tools/objtool/objtool.c | 1 +- 5 files changed, 34 insertions(+), 6 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 600fa67..77074db 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -451,7 +451,7 @@ static int create_static_call_sections(struct objtool_file *file) return 0; idx = 0; - list_for_each_entry(insn, >static_call_list, static_call_node) + list_for_each_entry(insn, >static_call_list, call_node) idx++; sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE, @@ -460,7 +460,7 @@ static int create_static_call_sections(struct objtool_file *file) return -1; idx = 0; - list_for_each_entry(insn, >static_call_list, static_call_node) { + list_for_each_entry(insn, >static_call_list, call_node) { site = (struct static_call_site *)sec->data->d_buf + idx; memset(site, 0, sizeof(struct static_call_site)); @@ -829,13 +829,16 @@ static int add_jump_destinations(struct objtool_file *file) else insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL; + list_add_tail(>call_node, + >retpoline_call_list); + insn->retpoline_safe = true; continue; } else if (insn->func) { /* internal or external sibling call (with reloc) */ insn->call_dest = reloc->sym; if (insn->call_dest->static_call_tramp) { - list_add_tail(>static_call_node, + list_add_tail(>call_node, >static_call_list); } continue; @@ -897,7 +900,7 @@ static int add_jump_destinations(struct objtool_file *file) /* internal sibling call (without reloc) */ insn->call_dest = insn->jump_dest->func; if (insn->call_dest->static_call_tramp) { - list_add_tail(>static_call_node, + list_add_tail(>call_node, >static_call_list); } } @@ -981,6 +984,9 @@ static int add_call_destinations(struct objtool_file *file) insn->type = INSN_CALL_DYNAMIC; insn->retpoline_safe = true; + list_add_tail(>call_node, + >retpoline_call_list); + remove_insn_ops(insn); continue; @@ -988,7 +994,7 @@ static int add_call_destinations(struct objtool_file *file) insn->call_dest = reloc->sym; if (insn->call_dest && insn->call_dest->static_call_tramp) { - list_add_tail(>static_call_node, + list_add_tail(>call_node, >static_call_list); } @@ -1714,6 +1720,11 @@ static void mark_rodata(struct objtool_file *file) file->rodata = found; } +__weak int arch_rewrite_retpolines(struct objtool_file *file) +{ + return 0; +} + static int decode_sections(struct objtool_file *file) { int ret; @@ -1742,6 +1753,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + /* +* Must be before add_special_section_alts() as that depends on +* jump_dest being set. +*/ ret = add_jump_destinations(file); if (ret) return ret; @@ -1778,6
[tip: x86/core] objtool: Cache instruction relocs
The following commit has been merged into the x86/core branch of tip: Commit-ID: 7bd2a600f3e9d27286bbf23c83d599e9cc7cf245 Gitweb: https://git.kernel.org/tip/7bd2a600f3e9d27286bbf23c83d599e9cc7cf245 Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:13 +01:00 Committer: Ingo Molnar CommitterDate: Fri, 02 Apr 2021 12:46:15 +02:00 objtool: Cache instruction relocs Track the reloc of instructions in the new instruction->reloc field to avoid having to look them up again later. ( Technically x86 instructions can have two relocations, but not jumps and calls, for which we're using this. ) Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Ingo Molnar Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151300.195441...@infradead.org --- tools/objtool/check.c | 28 -- tools/objtool/include/objtool/check.h | 1 +- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 77074db..1f4154f 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -797,6 +797,25 @@ __weak bool arch_is_retpoline(struct symbol *sym) return false; } +#define NEGATIVE_RELOC ((void *)-1L) + +static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn) +{ + if (insn->reloc == NEGATIVE_RELOC) + return NULL; + + if (!insn->reloc) { + insn->reloc = find_reloc_by_dest_range(file->elf, insn->sec, + insn->offset, insn->len); + if (!insn->reloc) { + insn->reloc = NEGATIVE_RELOC; + return NULL; + } + } + + return insn->reloc; +} + /* * Find the destination instructions for all jumps. */ @@ -811,8 +830,7 @@ static int add_jump_destinations(struct objtool_file *file) if (!is_static_jump(insn)) continue; - reloc = find_reloc_by_dest_range(file->elf, insn->sec, -insn->offset, insn->len); + reloc = insn_reloc(file, insn); if (!reloc) { dest_sec = insn->sec; dest_off = arch_jump_destination(insn); @@ -944,8 +962,7 @@ static int add_call_destinations(struct objtool_file *file) if (insn->type != INSN_CALL) continue; - reloc = find_reloc_by_dest_range(file->elf, insn->sec, - insn->offset, insn->len); + reloc = insn_reloc(file, insn); if (!reloc) { dest_off = arch_jump_destination(insn); insn->call_dest = find_call_destination(insn->sec, dest_off); @@ -1144,8 +1161,7 @@ static int handle_group_alt(struct objtool_file *file, * alternatives code can adjust the relative offsets * accordingly. */ - alt_reloc = find_reloc_by_dest_range(file->elf, insn->sec, - insn->offset, insn->len); + alt_reloc = insn_reloc(file, insn); if (alt_reloc && !arch_support_alt_relocation(special_alt, insn, alt_reloc)) { diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index e5528ce..56d50bc 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -56,6 +56,7 @@ struct instruction { struct instruction *jump_dest; struct instruction *first_jump_src; struct reloc *jump_table; + struct reloc *reloc; struct list_head alts; struct symbol *func; struct list_head stack_ops;
[tip: x86/core] objtool: Add elf_create_undef_symbol()
The following commit has been merged into the x86/core branch of tip: Commit-ID: 2f2f7e47f0525cbaad5dd9675fd9d8aa8da12046 Gitweb: https://git.kernel.org/tip/2f2f7e47f0525cbaad5dd9675fd9d8aa8da12046 Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:11 +01:00 Committer: Ingo Molnar CommitterDate: Fri, 02 Apr 2021 12:45:05 +02:00 objtool: Add elf_create_undef_symbol() Allow objtool to create undefined symbols; this allows creating relocations to symbols not currently in the symbol table. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Ingo Molnar Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151300.064743...@infradead.org --- tools/objtool/elf.c | 60 - tools/objtool/include/objtool/elf.h | 1 +- 2 files changed, 61 insertions(+) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 8457218..d08f5f3 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -715,6 +715,66 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str) return len; } +struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name) +{ + struct section *symtab; + struct symbol *sym; + Elf_Data *data; + Elf_Scn *s; + + sym = malloc(sizeof(*sym)); + if (!sym) { + perror("malloc"); + return NULL; + } + memset(sym, 0, sizeof(*sym)); + + sym->name = strdup(name); + + sym->sym.st_name = elf_add_string(elf, NULL, sym->name); + if (sym->sym.st_name == -1) + return NULL; + + sym->sym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE); + // st_other 0 + // st_shndx 0 + // st_value 0 + // st_size 0 + + symtab = find_section_by_name(elf, ".symtab"); + if (!symtab) { + WARN("can't find .symtab"); + return NULL; + } + + s = elf_getscn(elf->elf, symtab->idx); + if (!s) { + WARN_ELF("elf_getscn"); + return NULL; + } + + data = elf_newdata(s); + if (!data) { + WARN_ELF("elf_newdata"); + return NULL; + } + + data->d_buf = >sym; + data->d_size = sizeof(sym->sym); + data->d_align = 1; + + sym->idx = symtab->len / sizeof(sym->sym); + + symtab->len += data->d_size; + symtab->changed = true; + + sym->sec = find_section_by_index(elf, 0); + + elf_add_symbol(elf, sym); + + return sym; +} + struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr) { diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 463f329..45e5ede 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -133,6 +133,7 @@ int elf_write_insn(struct elf *elf, struct section *sec, unsigned long offset, unsigned int len, const char *insn); int elf_write_reloc(struct elf *elf, struct reloc *reloc); +struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name); int elf_write(struct elf *elf); void elf_close(struct elf *elf);
[tip: x86/core] objtool: Skip magical retpoline .altinstr_replacement
The following commit has been merged into the x86/core branch of tip: Commit-ID: 50e7b4a1a1b264fc7df0698f2defb93cadf19a7b Gitweb: https://git.kernel.org/tip/50e7b4a1a1b264fc7df0698f2defb93cadf19a7b Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:14 +01:00 Committer: Ingo Molnar CommitterDate: Fri, 02 Apr 2021 12:46:57 +02:00 objtool: Skip magical retpoline .altinstr_replacement When the .altinstr_replacement is a retpoline, skip the alternative. We already special case retpolines anyway. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Signed-off-by: Ingo Molnar Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151300.259429...@infradead.org --- tools/objtool/special.c | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/objtool/special.c b/tools/objtool/special.c index 2c7fbda..07b21cf 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -106,6 +106,14 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, return -1; } + /* +* Skip retpoline .altinstr_replacement... we already rewrite the +* instructions for retpolines anyway, see arch_is_retpoline() +* usage in add_{call,jump}_destinations(). +*/ + if (arch_is_retpoline(new_reloc->sym)) + return 1; + alt->new_sec = new_reloc->sym->sec; alt->new_off = (unsigned int)new_reloc->addend; @@ -154,7 +162,9 @@ int special_get_alts(struct elf *elf, struct list_head *alts) memset(alt, 0, sizeof(*alt)); ret = get_alt_entry(elf, entry, sec, idx, alt); - if (ret) + if (ret > 0) + continue; + if (ret < 0) return ret; list_add_tail(>list, alts);
[tip: x86/core] objtool: Fix static_call list generation
The following commit has been merged into the x86/core branch of tip: Commit-ID: b62b63571e4be0ce31984ce83b04853f2cba678b Gitweb: https://git.kernel.org/tip/b62b63571e4be0ce31984ce83b04853f2cba678b Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:05 +01:00 Committer: Borislav Petkov CommitterDate: Thu, 01 Apr 2021 11:43:16 +02:00 objtool: Fix static_call list generation Currently, objtool generates tail call entries in add_jump_destination() but waits until validate_branch() to generate the regular call entries. Move these to add_call_destination() for consistency. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151259.691529...@infradead.org --- tools/objtool/check.c | 17 - 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 6fbc001..8618d03 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1045,6 +1045,11 @@ static int add_call_destinations(struct objtool_file *file) } else insn->call_dest = reloc->sym; + if (insn->call_dest && insn->call_dest->static_call_tramp) { + list_add_tail(>static_call_node, + >static_call_list); + } + /* * Many compilers cannot disable KCOV with a function attribute * so they need a little help, NOP out any KCOV calls from noinstr @@ -1788,6 +1793,9 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + /* +* Must be before add_{jump_call}_destination. +*/ ret = read_static_call_tramps(file); if (ret) return ret; @@ -1800,6 +1808,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + /* +* Must be before add_call_destination(); it changes INSN_CALL to +* INSN_JUMP. +*/ ret = read_intra_function_calls(file); if (ret) return ret; @@ -2762,11 +2774,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (dead_end_function(file, insn->call_dest)) return 0; - if (insn->type == INSN_CALL && insn->call_dest->static_call_tramp) { - list_add_tail(>static_call_node, - >static_call_list); - } - break; case INSN_JUMP_CONDITIONAL:
[tip: x86/core] objtool: Cache instruction relocs
The following commit has been merged into the x86/core branch of tip: Commit-ID: 4ecdc0265dc911adba0772fd6e816d48da678fe7 Gitweb: https://git.kernel.org/tip/4ecdc0265dc911adba0772fd6e816d48da678fe7 Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:13 +01:00 Committer: Borislav Petkov CommitterDate: Thu, 01 Apr 2021 13:25:38 +02:00 objtool: Cache instruction relocs Track the reloc of instructions to avoid having to look them up again later. (Technically x86 instructions can have two relocations, but not jumps and calls, for which we're using this.) Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151300.195441...@infradead.org --- tools/objtool/check.c | 28 -- tools/objtool/include/objtool/check.h | 1 +- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 77074db..1f4154f 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -797,6 +797,25 @@ __weak bool arch_is_retpoline(struct symbol *sym) return false; } +#define NEGATIVE_RELOC ((void *)-1L) + +static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn) +{ + if (insn->reloc == NEGATIVE_RELOC) + return NULL; + + if (!insn->reloc) { + insn->reloc = find_reloc_by_dest_range(file->elf, insn->sec, + insn->offset, insn->len); + if (!insn->reloc) { + insn->reloc = NEGATIVE_RELOC; + return NULL; + } + } + + return insn->reloc; +} + /* * Find the destination instructions for all jumps. */ @@ -811,8 +830,7 @@ static int add_jump_destinations(struct objtool_file *file) if (!is_static_jump(insn)) continue; - reloc = find_reloc_by_dest_range(file->elf, insn->sec, -insn->offset, insn->len); + reloc = insn_reloc(file, insn); if (!reloc) { dest_sec = insn->sec; dest_off = arch_jump_destination(insn); @@ -944,8 +962,7 @@ static int add_call_destinations(struct objtool_file *file) if (insn->type != INSN_CALL) continue; - reloc = find_reloc_by_dest_range(file->elf, insn->sec, - insn->offset, insn->len); + reloc = insn_reloc(file, insn); if (!reloc) { dest_off = arch_jump_destination(insn); insn->call_dest = find_call_destination(insn->sec, dest_off); @@ -1144,8 +1161,7 @@ static int handle_group_alt(struct objtool_file *file, * alternatives code can adjust the relative offsets * accordingly. */ - alt_reloc = find_reloc_by_dest_range(file->elf, insn->sec, - insn->offset, insn->len); + alt_reloc = insn_reloc(file, insn); if (alt_reloc && !arch_support_alt_relocation(special_alt, insn, alt_reloc)) { diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index e5528ce..56d50bc 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -56,6 +56,7 @@ struct instruction { struct instruction *jump_dest; struct instruction *first_jump_src; struct reloc *jump_table; + struct reloc *reloc; struct list_head alts; struct symbol *func; struct list_head stack_ops;
[tip: x86/core] objtool: Rework rebuild_reloc logic
The following commit has been merged into the x86/core branch of tip: Commit-ID: 98ce4d014ad4c1c4afcc427fc3f0002674315cb9 Gitweb: https://git.kernel.org/tip/98ce4d014ad4c1c4afcc427fc3f0002674315cb9 Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:06 +01:00 Committer: Borislav Petkov CommitterDate: Thu, 01 Apr 2021 12:51:35 +02:00 objtool: Rework rebuild_reloc logic Instead of manually calling elf_rebuild_reloc_section() on sections we've called elf_add_reloc() on, have elf_write() DTRT. This makes it easier to add random relocations in places without carefully tracking when we're done and need to flush what section. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151259.754213...@infradead.org --- tools/objtool/check.c | 6 -- tools/objtool/elf.c | 20 ++-- tools/objtool/include/objtool/elf.h | 1 - tools/objtool/orc_gen.c | 3 --- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 8618d03..1d0415b 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -542,9 +542,6 @@ static int create_static_call_sections(struct objtool_file *file) idx++; } - if (elf_rebuild_reloc_section(file->elf, reloc_sec)) - return -1; - return 0; } @@ -614,9 +611,6 @@ static int create_mcount_loc_sections(struct objtool_file *file) idx++; } - if (elf_rebuild_reloc_section(file->elf, reloc_sec)) - return -1; - return 0; } diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 93fa833..374813e 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -479,6 +479,8 @@ void elf_add_reloc(struct elf *elf, struct reloc *reloc) list_add_tail(>list, >reloc_list); elf_hash_add(elf->reloc_hash, >hash, reloc_hash(reloc)); + + sec->changed = true; } static int read_rel_reloc(struct section *sec, int i, struct reloc *reloc, unsigned int *symndx) @@ -558,7 +560,9 @@ static int read_relocs(struct elf *elf) return -1; } - elf_add_reloc(elf, reloc); + list_add_tail(>list, >reloc_list); + elf_hash_add(elf->reloc_hash, >hash, reloc_hash(reloc)); + nr_reloc++; } max_reloc = max(max_reloc, nr_reloc); @@ -873,14 +877,11 @@ static int elf_rebuild_rela_reloc_section(struct section *sec, int nr) return 0; } -int elf_rebuild_reloc_section(struct elf *elf, struct section *sec) +static int elf_rebuild_reloc_section(struct elf *elf, struct section *sec) { struct reloc *reloc; int nr; - sec->changed = true; - elf->changed = true; - nr = 0; list_for_each_entry(reloc, >reloc_list, list) nr++; @@ -944,9 +945,15 @@ int elf_write(struct elf *elf) struct section *sec; Elf_Scn *s; - /* Update section headers for changed sections: */ + /* Update changed relocation sections and section headers: */ list_for_each_entry(sec, >sections, list) { if (sec->changed) { + if (sec->base && + elf_rebuild_reloc_section(elf, sec)) { + WARN("elf_rebuild_reloc_section"); + return -1; + } + s = elf_getscn(elf->elf, sec->idx); if (!s) { WARN_ELF("elf_getscn"); @@ -958,6 +965,7 @@ int elf_write(struct elf *elf) } sec->changed = false; + elf->changed = true; } } diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index e6890cc..fc576ed 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -142,7 +142,6 @@ struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *se struct symbol *find_func_containing(struct section *sec, unsigned long offset); void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset, struct reloc *reloc); -int elf_rebuild_reloc_section(struct elf *elf, struct section *sec); #define for_each_sec(file, sec) \ list_for_each_entry(sec, >elf->sections, list) diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 738aa50..f534708 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -254,8 +254,5 @@ int orc_create(struct objtool_file *file) return -1; } - if
[tip: x86/core] x86: Add insn_decode_kernel()
The following commit has been merged into the x86/core branch of tip: Commit-ID: 52fa82c21f64e900a72437269a5cc9e0034b424e Gitweb: https://git.kernel.org/tip/52fa82c21f64e900a72437269a5cc9e0034b424e Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:00 +01:00 Committer: Borislav Petkov CommitterDate: Wed, 31 Mar 2021 16:20:22 +02:00 x86: Add insn_decode_kernel() Add a helper to decode kernel instructions; there's no point in endlessly repeating those last two arguments. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210326151259.379242...@infradead.org --- arch/x86/include/asm/insn.h| 2 ++ arch/x86/kernel/alternative.c | 2 +- arch/x86/kernel/cpu/mce/severity.c | 2 +- arch/x86/kernel/kprobes/core.c | 4 ++-- arch/x86/kernel/kprobes/opt.c | 2 +- arch/x86/kernel/traps.c| 2 +- tools/arch/x86/include/asm/insn.h | 2 ++ 7 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index f03b6ca..05a6ab9 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -150,6 +150,8 @@ enum insn_mode { extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m); +#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN) + /* Attribute will be determined after getting ModRM (for opcode groups) */ static inline void insn_get_attribute(struct insn *insn) { diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ce28c5c..ff359b3 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1280,7 +1280,7 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, if (!emulate) emulate = opcode; - ret = insn_decode(, emulate, MAX_INSN_SIZE, INSN_MODE_KERN); + ret = insn_decode_kernel(, emulate); BUG_ON(ret < 0); BUG_ON(len != insn.length); diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index a2136ce..abdd2e4 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -225,7 +225,7 @@ static bool is_copy_from_user(struct pt_regs *regs) if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE)) return false; - ret = insn_decode(, insn_buf, MAX_INSN_SIZE, INSN_MODE_KERN); + ret = insn_decode_kernel(, insn_buf); if (ret < 0) return false; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index dd09021..1319ff4 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -285,7 +285,7 @@ static int can_probe(unsigned long paddr) if (!__addr) return 0; - ret = insn_decode(, (void *)__addr, MAX_INSN_SIZE, INSN_MODE_KERN); + ret = insn_decode_kernel(, (void *)__addr); if (ret < 0) return 0; @@ -322,7 +322,7 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) MAX_INSN_SIZE)) return 0; - ret = insn_decode(insn, dest, MAX_INSN_SIZE, INSN_MODE_KERN); + ret = insn_decode_kernel(insn, dest); if (ret < 0) return 0; diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 4299fc8..71425eb 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -324,7 +324,7 @@ static int can_optimize(unsigned long paddr) if (!recovered_insn) return 0; - ret = insn_decode(, (void *)recovered_insn, MAX_INSN_SIZE, INSN_MODE_KERN); + ret = insn_decode_kernel(, (void *)recovered_insn); if (ret < 0) return 0; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a5d2540..034f27f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -504,7 +504,7 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs, MAX_INSN_SIZE)) return GP_NO_HINT; - ret = insn_decode(, insn_buf, MAX_INSN_SIZE, INSN_MODE_KERN); + ret = insn_decode_kernel(, insn_buf); if (ret < 0) return GP_NO_HINT; diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h index c9f3eee..dc632b4 100644 --- a/tools/arch/x86/include/asm/insn.h +++ b/tools/arch/x86/include/asm/insn.h @@ -150,6 +150,8 @@ enum insn_mode { extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m); +#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN) + /* Attribute will be determined after getting ModRM (for opcode
[tip: x86/core] x86/alternatives: Optimize optimize_nops()
The following commit has been merged into the x86/core branch of tip: Commit-ID: b4da5166b084f3fac01d68e0e67cbf3bf78a3e12 Gitweb: https://git.kernel.org/tip/b4da5166b084f3fac01d68e0e67cbf3bf78a3e12 Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:01 +01:00 Committer: Borislav Petkov CommitterDate: Wed, 31 Mar 2021 20:30:04 +02:00 x86/alternatives: Optimize optimize_nops() Currently, optimize_nops() scans to see if the alternative starts with NOPs. However, the emit pattern is: 141: \oldinstr 142: .skip (len-(142b-141b)), 0x90 That is, when oldinstr is short, the tail is padded with NOPs. This case never gets optimized. Rewrite optimize_nops() to replace any trailing string of NOPs inside the alternative to larger NOPs. Also run it irrespective of patching, replacing NOPs in both the original and replaced code. A direct consequence is that padlen becomes superfluous, so remove it. [ bp: - Adjust commit message - remove a stale comment about needing to pad - add a comment in optimize_nops() - exit early if the NOP verif. loop catches a mismatch - function should not not add NOPs in that case - fix the "optimized NOPs" offsets output ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210326151259.442992...@infradead.org --- arch/x86/include/asm/alternative.h| 17 +- arch/x86/kernel/alternative.c | 49 +++--- tools/objtool/arch/x86/include/arch/special.h | 2 +- 3 files changed, 37 insertions(+), 31 deletions(-) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 17b3609..a3c2315 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -65,7 +65,6 @@ struct alt_instr { u16 cpuid; /* cpuid bit set for replacement */ u8 instrlen; /* length of original instruction */ u8 replacementlen; /* length of new instruction */ - u8 padlen; /* length of build-time padding */ } __packed; /* @@ -104,7 +103,6 @@ static inline int alternatives_text_reserved(void *start, void *end) #define alt_end_marker "663" #define alt_slen "662b-661b" -#define alt_pad_lenalt_end_marker"b-662b" #define alt_total_slen alt_end_marker"b-661b" #define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" @@ -151,8 +149,7 @@ static inline int alternatives_text_reserved(void *start, void *end) " .long " b_replacement(num)"f - .\n" /* new instruction */ \ " .word " __stringify(feature) "\n" /* feature bit */ \ " .byte " alt_total_slen "\n" /* source len */ \ - " .byte " alt_rlen(num) "\n"/* replacement len */ \ - " .byte " alt_pad_len "\n" /* pad len */ + " .byte " alt_rlen(num) "\n"/* replacement len */ #define ALTINSTR_REPLACEMENT(newinstr, num)/* replacement */ \ "# ALT: replacement " #num "\n" \ @@ -224,9 +221,6 @@ static inline int alternatives_text_reserved(void *start, void *end) * Peculiarities: * No memory clobber here. * Argument numbers start with 1. - * Best is to use constraints that are fixed size (like (%1) ... "r") - * If you use variable sized constraints like "m" or "g" in the - * replacement make sure to pad to the worst case length. * Leaving an unused argument 0 to keep API compatibility. */ #define alternative_input(oldinstr, newinstr, feature, input...) \ @@ -315,13 +309,12 @@ static inline int alternatives_text_reserved(void *start, void *end) * enough information for the alternatives patching code to patch an * instruction. See apply_alternatives(). */ -.macro altinstruction_entry orig alt feature orig_len alt_len pad_len +.macro altinstruction_entry orig alt feature orig_len alt_len .long \orig - . .long \alt - . .word \feature .byte \orig_len .byte \alt_len - .byte \pad_len .endm /* @@ -338,7 +331,7 @@ static inline int alternatives_text_reserved(void *start, void *end) 142: .pushsection .altinstructions,"a" - altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b + altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f .popsection .pushsection .altinstr_replacement,"ax" @@ -375,8 +368,8 @@ static inline int alternatives_text_reserved(void *start, void *end) 142: .pushsection .altinstructions,"a" - altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b - altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b + altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f + altinstruction_entry
[tip: x86/core] objtool: Correctly handle retpoline thunk calls
The following commit has been merged into the x86/core branch of tip: Commit-ID: db9d1dd670d7f3f146c654f289f20968af6a12de Gitweb: https://git.kernel.org/tip/db9d1dd670d7f3f146c654f289f20968af6a12de Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:03 +01:00 Committer: Borislav Petkov CommitterDate: Thu, 01 Apr 2021 11:34:01 +02:00 objtool: Correctly handle retpoline thunk calls Just like JMP handling, convert a direct CALL to a retpoline thunk into a retpoline safe indirect CALL. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151259.567568...@infradead.org --- tools/objtool/check.c | 12 1 file changed, 12 insertions(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index d45f018..519af4b 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1025,6 +1025,18 @@ static int add_call_destinations(struct objtool_file *file) dest_off); return -1; } + + } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) { + /* +* Retpoline calls are really dynamic calls in +* disguise, so convert them accordingly. +*/ + insn->type = INSN_CALL_DYNAMIC; + insn->retpoline_safe = true; + + remove_insn_ops(insn); + continue; + } else insn->call_dest = reloc->sym;
[tip: x86/core] objtool: Extract elf_symbol_add()
The following commit has been merged into the x86/core branch of tip: Commit-ID: d56a3568827ec4b8efcbcfc46fdc944995b6dcf1 Gitweb: https://git.kernel.org/tip/d56a3568827ec4b8efcbcfc46fdc944995b6dcf1 Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:10 +01:00 Committer: Borislav Petkov CommitterDate: Thu, 01 Apr 2021 13:08:52 +02:00 objtool: Extract elf_symbol_add() Create a common helper to add symbols. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151300.003468...@infradead.org --- tools/objtool/elf.c | 56 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index c278a04..8457218 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -290,12 +290,39 @@ static int read_sections(struct elf *elf) return 0; } +static void elf_add_symbol(struct elf *elf, struct symbol *sym) +{ + struct list_head *entry; + struct rb_node *pnode; + + sym->type = GELF_ST_TYPE(sym->sym.st_info); + sym->bind = GELF_ST_BIND(sym->sym.st_info); + + sym->offset = sym->sym.st_value; + sym->len = sym->sym.st_size; + + rb_add(>node, >sec->symbol_tree, symbol_to_offset); + pnode = rb_prev(>node); + if (pnode) + entry = _entry(pnode, struct symbol, node)->list; + else + entry = >sec->symbol_list; + list_add(>list, entry); + elf_hash_add(elf->symbol_hash, >hash, sym->idx); + elf_hash_add(elf->symbol_name_hash, >name_hash, str_hash(sym->name)); + + /* +* Don't store empty STT_NOTYPE symbols in the rbtree. They +* can exist within a function, confusing the sorting. +*/ + if (!sym->len) + rb_erase(>node, >sec->symbol_tree); +} + static int read_symbols(struct elf *elf) { struct section *symtab, *symtab_shndx, *sec; struct symbol *sym, *pfunc; - struct list_head *entry; - struct rb_node *pnode; int symbols_nr, i; char *coldstr; Elf_Data *shndx_data = NULL; @@ -340,9 +367,6 @@ static int read_symbols(struct elf *elf) goto err; } - sym->type = GELF_ST_TYPE(sym->sym.st_info); - sym->bind = GELF_ST_BIND(sym->sym.st_info); - if ((sym->sym.st_shndx > SHN_UNDEF && sym->sym.st_shndx < SHN_LORESERVE) || (shndx_data && sym->sym.st_shndx == SHN_XINDEX)) { @@ -355,32 +379,14 @@ static int read_symbols(struct elf *elf) sym->name); goto err; } - if (sym->type == STT_SECTION) { + if (GELF_ST_TYPE(sym->sym.st_info) == STT_SECTION) { sym->name = sym->sec->name; sym->sec->sym = sym; } } else sym->sec = find_section_by_index(elf, 0); - sym->offset = sym->sym.st_value; - sym->len = sym->sym.st_size; - - rb_add(>node, >sec->symbol_tree, symbol_to_offset); - pnode = rb_prev(>node); - if (pnode) - entry = _entry(pnode, struct symbol, node)->list; - else - entry = >sec->symbol_list; - list_add(>list, entry); - elf_hash_add(elf->symbol_hash, >hash, sym->idx); - elf_hash_add(elf->symbol_name_hash, >name_hash, str_hash(sym->name)); - - /* -* Don't store empty STT_NOTYPE symbols in the rbtree. They -* can exist within a function, confusing the sorting. -*/ - if (!sym->len) - rb_erase(>node, >sec->symbol_tree); + elf_add_symbol(elf, sym); } if (stats)
[tip: x86/core] objtool/x86: Rewrite retpoline thunk calls
The following commit has been merged into the x86/core branch of tip: Commit-ID: f31390437ce984118215169d75570e365457ec23 Gitweb: https://git.kernel.org/tip/f31390437ce984118215169d75570e365457ec23 Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:15 +01:00 Committer: Borislav Petkov CommitterDate: Thu, 01 Apr 2021 14:30:45 +02:00 objtool/x86: Rewrite retpoline thunk calls When the compiler emits: "CALL __x86_indirect_thunk_\reg" for an indirect call, have objtool rewrite it to: ALTERNATIVE "call __x86_indirect_thunk_\reg", "call *%reg", ALT_NOT(X86_FEATURE_RETPOLINE) Additionally, in order to not emit endless identical .altinst_replacement chunks, use a global symbol for them, see __x86_indirect_alt_*. This also avoids objtool from having to do code generation. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151300.320177...@infradead.org --- arch/x86/include/asm/asm-prototypes.h | 12 ++- arch/x86/lib/retpoline.S | 41 - tools/objtool/arch/x86/decode.c | 117 +- 3 files changed, 167 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 0545b07..4cb726c 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -19,11 +19,19 @@ extern void cmpxchg8b_emu(void); #ifdef CONFIG_RETPOLINE -#define DECL_INDIRECT_THUNK(reg) \ +#undef GEN +#define GEN(reg) \ extern asmlinkage void __x86_indirect_thunk_ ## reg (void); +#include + +#undef GEN +#define GEN(reg) \ + extern asmlinkage void __x86_indirect_alt_call_ ## reg (void); +#include #undef GEN -#define GEN(reg) DECL_INDIRECT_THUNK(reg) +#define GEN(reg) \ + extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void); #include #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index d2c0d14..4d32cb0 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -10,6 +10,8 @@ #include #include + .section .text.__x86.indirect_thunk + .macro RETPOLINE reg ANNOTATE_INTRA_FUNCTION_CALL call.Ldo_rop_\@ @@ -25,9 +27,9 @@ .endm .macro THUNK reg - .section .text.__x86.indirect_thunk .align 32 + SYM_FUNC_START(__x86_indirect_thunk_\reg) ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ @@ -39,6 +41,32 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg) .endm /* + * This generates .altinstr_replacement symbols for use by objtool. They, + * however, must not actually live in .altinstr_replacement since that will be + * discarded after init, but module alternatives will also reference these + * symbols. + * + * Their names matches the "__x86_indirect_" prefix to mark them as retpolines. + */ +.macro ALT_THUNK reg + + .align 1 + +SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg) + ANNOTATE_RETPOLINE_SAFE +1: call*%\reg +2: .skip 5-(2b-1b), 0x90 +SYM_FUNC_END(__x86_indirect_alt_call_\reg) + +SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg) + ANNOTATE_RETPOLINE_SAFE +1: jmp *%\reg +2: .skip 5-(2b-1b), 0x90 +SYM_FUNC_END(__x86_indirect_alt_jmp_\reg) + +.endm + +/* * Despite being an assembler file we can't just use .irp here * because __KSYM_DEPS__ only uses the C preprocessor and would * only see one instance of "__x86_indirect_thunk_\reg" rather @@ -61,3 +89,14 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg) #define GEN(reg) EXPORT_THUNK(reg) #include +#undef GEN +#define GEN(reg) ALT_THUNK reg +#include + +#undef GEN +#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_call_ ## reg) +#include + +#undef GEN +#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_jmp_ ## reg) +#include diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index e5fa3a5..44375fa 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -16,6 +16,7 @@ #include #include #include +#include static unsigned char op_to_cfi_reg[][2] = { {CFI_AX, CFI_R8}, @@ -610,6 +611,122 @@ const char *arch_nop_insn(int len) return nops[len-1]; } +/* asm/alternative.h ? */ + +#define ALTINSTR_FLAG_INV (1 << 15) +#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV) + +struct alt_instr { + s32 instr_offset; /* original instruction */ + s32 repl_offset;/* offset to replacement instruction */ + u16 cpuid; /* cpuid bit set for replacement */ + u8 instrlen; /* length of original instruction */ + u8 replacementlen; /* length of new instruction */ +} __packed; + +static int elf_add_alternative(struct elf *elf, + struct instruction *orig, struct symbol *sym, + int
[tip: x86/core] objtool: Extract elf_strtab_concat()
The following commit has been merged into the x86/core branch of tip: Commit-ID: 557c25be3588971caf21364b6fd240769e37c47c Gitweb: https://git.kernel.org/tip/557c25be3588971caf21364b6fd240769e37c47c Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:09 +01:00 Committer: Borislav Petkov CommitterDate: Thu, 01 Apr 2021 13:05:50 +02:00 objtool: Extract elf_strtab_concat() Create a common helper to append strings to a strtab. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151259.941474...@infradead.org --- tools/objtool/elf.c | 60 +++- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 7b65ae3..c278a04 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -673,13 +673,48 @@ err: return NULL; } +static int elf_add_string(struct elf *elf, struct section *strtab, char *str) +{ + Elf_Data *data; + Elf_Scn *s; + int len; + + if (!strtab) + strtab = find_section_by_name(elf, ".strtab"); + if (!strtab) { + WARN("can't find .strtab section"); + return -1; + } + + s = elf_getscn(elf->elf, strtab->idx); + if (!s) { + WARN_ELF("elf_getscn"); + return -1; + } + + data = elf_newdata(s); + if (!data) { + WARN_ELF("elf_newdata"); + return -1; + } + + data->d_buf = str; + data->d_size = strlen(str) + 1; + data->d_align = 1; + + len = strtab->len; + strtab->len += data->d_size; + strtab->changed = true; + + return len; +} + struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr) { struct section *sec, *shstrtab; size_t size = entsize * nr; Elf_Scn *s; - Elf_Data *data; sec = malloc(sizeof(*sec)); if (!sec) { @@ -736,7 +771,6 @@ struct section *elf_create_section(struct elf *elf, const char *name, sec->sh.sh_addralign = 1; sec->sh.sh_flags = SHF_ALLOC | sh_flags; - /* Add section name to .shstrtab (or .strtab for Clang) */ shstrtab = find_section_by_name(elf, ".shstrtab"); if (!shstrtab) @@ -745,27 +779,9 @@ struct section *elf_create_section(struct elf *elf, const char *name, WARN("can't find .shstrtab or .strtab section"); return NULL; } - - s = elf_getscn(elf->elf, shstrtab->idx); - if (!s) { - WARN_ELF("elf_getscn"); + sec->sh.sh_name = elf_add_string(elf, shstrtab, sec->name); + if (sec->sh.sh_name == -1) return NULL; - } - - data = elf_newdata(s); - if (!data) { - WARN_ELF("elf_newdata"); - return NULL; - } - - data->d_buf = sec->name; - data->d_size = strlen(name) + 1; - data->d_align = 1; - - sec->sh.sh_name = shstrtab->len; - - shstrtab->len += strlen(name) + 1; - shstrtab->changed = true; list_add_tail(>list, >sections); elf_hash_add(elf->section_hash, >hash, sec->idx);
[tip: x86/core] objtool: Handle per arch retpoline naming
The following commit has been merged into the x86/core branch of tip: Commit-ID: 3b652980a250c1ed9e0c361750f029781831cdc3 Gitweb: https://git.kernel.org/tip/3b652980a250c1ed9e0c361750f029781831cdc3 Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:04 +01:00 Committer: Borislav Petkov CommitterDate: Thu, 01 Apr 2021 11:36:52 +02:00 objtool: Handle per arch retpoline naming The __x86_indirect_ naming is obviously not generic. Shorten to allow matching some additional magic names later. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151259.630296...@infradead.org --- tools/objtool/arch/x86/decode.c | 5 + tools/objtool/check.c| 9 +++-- tools/objtool/include/objtool/arch.h | 2 ++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 8380d0b..e5fa3a5 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -645,3 +645,8 @@ int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg) return 0; } + +bool arch_is_retpoline(struct symbol *sym) +{ + return !strncmp(sym->name, "__x86_indirect_", 15); +} diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 519af4b..6fbc001 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -850,6 +850,11 @@ static int add_ignore_alternatives(struct objtool_file *file) return 0; } +__weak bool arch_is_retpoline(struct symbol *sym) +{ + return false; +} + /* * Find the destination instructions for all jumps. */ @@ -872,7 +877,7 @@ static int add_jump_destinations(struct objtool_file *file) } else if (reloc->sym->type == STT_SECTION) { dest_sec = reloc->sym->sec; dest_off = arch_dest_reloc_offset(reloc->addend); - } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) { + } else if (arch_is_retpoline(reloc->sym)) { /* * Retpoline jumps are really dynamic jumps in * disguise, so convert them accordingly. @@ -1026,7 +1031,7 @@ static int add_call_destinations(struct objtool_file *file) return -1; } - } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21)) { + } else if (arch_is_retpoline(reloc->sym)) { /* * Retpoline calls are really dynamic calls in * disguise, so convert them accordingly. diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 6ff0685..bb30993 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -86,4 +86,6 @@ const char *arch_nop_insn(int len); int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg); +bool arch_is_retpoline(struct symbol *sym); + #endif /* _ARCH_H */
[tip: x86/core] objtool: Add elf_create_reloc() helper
The following commit has been merged into the x86/core branch of tip: Commit-ID: 7508e2958a82675e75e34221c26ad4242d4ef283 Gitweb: https://git.kernel.org/tip/7508e2958a82675e75e34221c26ad4242d4ef283 Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:07 +01:00 Committer: Borislav Petkov CommitterDate: Thu, 01 Apr 2021 12:55:55 +02:00 objtool: Add elf_create_reloc() helper We have 4 instances of adding a relocation. Create a common helper to avoid growing even more. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151259.817438...@infradead.org --- tools/objtool/check.c | 78 + tools/objtool/elf.c | 86 ++-- tools/objtool/include/objtool/elf.h | 10 ++- tools/objtool/orc_gen.c | 30 ++ 4 files changed, 85 insertions(+), 119 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 1d0415b..61fe29a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -433,8 +433,7 @@ reachable: static int create_static_call_sections(struct objtool_file *file) { - struct section *sec, *reloc_sec; - struct reloc *reloc; + struct section *sec; struct static_call_site *site; struct instruction *insn; struct symbol *key_sym; @@ -460,8 +459,7 @@ static int create_static_call_sections(struct objtool_file *file) if (!sec) return -1; - reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA); - if (!reloc_sec) + if (!elf_create_reloc_section(file->elf, sec, SHT_RELA)) return -1; idx = 0; @@ -471,25 +469,11 @@ static int create_static_call_sections(struct objtool_file *file) memset(site, 0, sizeof(struct static_call_site)); /* populate reloc for 'addr' */ - reloc = malloc(sizeof(*reloc)); - - if (!reloc) { - perror("malloc"); - return -1; - } - memset(reloc, 0, sizeof(*reloc)); - - insn_to_reloc_sym_addend(insn->sec, insn->offset, reloc); - if (!reloc->sym) { - WARN_FUNC("static call tramp: missing containing symbol", - insn->sec, insn->offset); + if (elf_add_reloc_to_insn(file->elf, sec, + idx * sizeof(struct static_call_site), + R_X86_64_PC32, + insn->sec, insn->offset)) return -1; - } - - reloc->type = R_X86_64_PC32; - reloc->offset = idx * sizeof(struct static_call_site); - reloc->sec = reloc_sec; - elf_add_reloc(file->elf, reloc); /* find key symbol */ key_name = strdup(insn->call_dest->name); @@ -526,18 +510,11 @@ static int create_static_call_sections(struct objtool_file *file) free(key_name); /* populate reloc for 'key' */ - reloc = malloc(sizeof(*reloc)); - if (!reloc) { - perror("malloc"); + if (elf_add_reloc(file->elf, sec, + idx * sizeof(struct static_call_site) + 4, + R_X86_64_PC32, key_sym, + is_sibling_call(insn) * STATIC_CALL_SITE_TAIL)) return -1; - } - memset(reloc, 0, sizeof(*reloc)); - reloc->sym = key_sym; - reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0; - reloc->type = R_X86_64_PC32; - reloc->offset = idx * sizeof(struct static_call_site) + 4; - reloc->sec = reloc_sec; - elf_add_reloc(file->elf, reloc); idx++; } @@ -547,8 +524,7 @@ static int create_static_call_sections(struct objtool_file *file) static int create_mcount_loc_sections(struct objtool_file *file) { - struct section *sec, *reloc_sec; - struct reloc *reloc; + struct section *sec; unsigned long *loc; struct instruction *insn; int idx; @@ -571,8 +547,7 @@ static int create_mcount_loc_sections(struct objtool_file *file) if (!sec) return -1; - reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA); - if (!reloc_sec) + if (!elf_create_reloc_section(file->elf, sec, SHT_RELA)) return -1; idx = 0; @@ -581,32 +556,11 @@ static int create_mcount_loc_sections(struct objtool_file *file) loc = (unsigned long *)sec->data->d_buf + idx; memset(loc, 0, sizeof(unsigned long)); -
[tip: x86/core] objtool: Implicitly create reloc sections
The following commit has been merged into the x86/core branch of tip: Commit-ID: aef0f13e96db08f31be6b96d28e761df46d86ff4 Gitweb: https://git.kernel.org/tip/aef0f13e96db08f31be6b96d28e761df46d86ff4 Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:08 +01:00 Committer: Borislav Petkov CommitterDate: Thu, 01 Apr 2021 13:01:15 +02:00 objtool: Implicitly create reloc sections Have elf_add_reloc() create the relocation section implicitly. Suggested-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151259.880174...@infradead.org --- tools/objtool/check.c | 6 -- tools/objtool/elf.c | 9 - tools/objtool/include/objtool/elf.h | 1 - tools/objtool/orc_gen.c | 2 -- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 61fe29a..600fa67 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -459,9 +459,6 @@ static int create_static_call_sections(struct objtool_file *file) if (!sec) return -1; - if (!elf_create_reloc_section(file->elf, sec, SHT_RELA)) - return -1; - idx = 0; list_for_each_entry(insn, >static_call_list, static_call_node) { @@ -547,9 +544,6 @@ static int create_mcount_loc_sections(struct objtool_file *file) if (!sec) return -1; - if (!elf_create_reloc_section(file->elf, sec, SHT_RELA)) - return -1; - idx = 0; list_for_each_entry(insn, >mcount_loc_list, mcount_loc_node) { diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 0ab52ac..7b65ae3 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -447,11 +447,18 @@ err: return -1; } +static struct section *elf_create_reloc_section(struct elf *elf, + struct section *base, + int reltype); + int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, unsigned int type, struct symbol *sym, int addend) { struct reloc *reloc; + if (!sec->reloc && !elf_create_reloc_section(elf, sec, SHT_RELA)) + return -1; + reloc = malloc(sizeof(*reloc)); if (!reloc) { perror("malloc"); @@ -829,7 +836,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec return sec; } -struct section *elf_create_reloc_section(struct elf *elf, +static struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype) { diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 825ad32..463f329 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -122,7 +122,6 @@ static inline u32 reloc_hash(struct reloc *reloc) struct elf *elf_open_read(const char *name, int flags); struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); -struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype); int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, unsigned int type, struct symbol *sym, int addend); diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 1b57be6..dc9b7dd 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -225,8 +225,6 @@ int orc_create(struct objtool_file *file) sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr); if (!sec) return -1; - if (!elf_create_reloc_section(file->elf, sec, SHT_RELA)) - return -1; /* Write ORC entries to sections: */ list_for_each_entry(entry, _list, list) {
[tip: x86/core] objtool: Skip magical retpoline .altinstr_replacement
The following commit has been merged into the x86/core branch of tip: Commit-ID: 68a59124f4c6363de619fea63231a97dd220a12c Gitweb: https://git.kernel.org/tip/68a59124f4c6363de619fea63231a97dd220a12c Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:14 +01:00 Committer: Borislav Petkov CommitterDate: Thu, 01 Apr 2021 13:29:40 +02:00 objtool: Skip magical retpoline .altinstr_replacement When the .altinstr_replacement is a retpoline, skip the alternative. We already special case retpolines anyway. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151300.259429...@infradead.org --- tools/objtool/special.c | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/objtool/special.c b/tools/objtool/special.c index 2c7fbda..07b21cf 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -106,6 +106,14 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, return -1; } + /* +* Skip retpoline .altinstr_replacement... we already rewrite the +* instructions for retpolines anyway, see arch_is_retpoline() +* usage in add_{call,jump}_destinations(). +*/ + if (arch_is_retpoline(new_reloc->sym)) + return 1; + alt->new_sec = new_reloc->sym->sec; alt->new_off = (unsigned int)new_reloc->addend; @@ -154,7 +162,9 @@ int special_get_alts(struct elf *elf, struct list_head *alts) memset(alt, 0, sizeof(*alt)); ret = get_alt_entry(elf, entry, sec, idx, alt); - if (ret) + if (ret > 0) + continue; + if (ret < 0) return ret; list_add_tail(>list, alts);
[tip: x86/core] objtool: Add elf_create_undef_symbol()
The following commit has been merged into the x86/core branch of tip: Commit-ID: 993b477acdb652c6134e5faae05e8a378911cbb3 Gitweb: https://git.kernel.org/tip/993b477acdb652c6134e5faae05e8a378911cbb3 Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:11 +01:00 Committer: Borislav Petkov CommitterDate: Thu, 01 Apr 2021 13:12:48 +02:00 objtool: Add elf_create_undef_symbol() Allow objtool to create undefined symbols; this allows creating relocations to symbols not currently in the symbol table. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151300.064743...@infradead.org --- tools/objtool/elf.c | 60 - tools/objtool/include/objtool/elf.h | 1 +- 2 files changed, 61 insertions(+) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 8457218..d08f5f3 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -715,6 +715,66 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str) return len; } +struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name) +{ + struct section *symtab; + struct symbol *sym; + Elf_Data *data; + Elf_Scn *s; + + sym = malloc(sizeof(*sym)); + if (!sym) { + perror("malloc"); + return NULL; + } + memset(sym, 0, sizeof(*sym)); + + sym->name = strdup(name); + + sym->sym.st_name = elf_add_string(elf, NULL, sym->name); + if (sym->sym.st_name == -1) + return NULL; + + sym->sym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE); + // st_other 0 + // st_shndx 0 + // st_value 0 + // st_size 0 + + symtab = find_section_by_name(elf, ".symtab"); + if (!symtab) { + WARN("can't find .symtab"); + return NULL; + } + + s = elf_getscn(elf->elf, symtab->idx); + if (!s) { + WARN_ELF("elf_getscn"); + return NULL; + } + + data = elf_newdata(s); + if (!data) { + WARN_ELF("elf_newdata"); + return NULL; + } + + data->d_buf = >sym; + data->d_size = sizeof(sym->sym); + data->d_align = 1; + + sym->idx = symtab->len / sizeof(sym->sym); + + symtab->len += data->d_size; + symtab->changed = true; + + sym->sec = find_section_by_index(elf, 0); + + elf_add_symbol(elf, sym); + + return sym; +} + struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr) { diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 463f329..45e5ede 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -133,6 +133,7 @@ int elf_write_insn(struct elf *elf, struct section *sec, unsigned long offset, unsigned int len, const char *insn); int elf_write_reloc(struct elf *elf, struct reloc *reloc); +struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name); int elf_write(struct elf *elf); void elf_close(struct elf *elf);
[tip: x86/core] objtool: Keep track of retpoline call sites
The following commit has been merged into the x86/core branch of tip: Commit-ID: 7e57a6bc5a22145429d3a232619b0637c312397a Gitweb: https://git.kernel.org/tip/7e57a6bc5a22145429d3a232619b0637c312397a Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:12 +01:00 Committer: Borislav Petkov CommitterDate: Thu, 01 Apr 2021 13:20:21 +02:00 objtool: Keep track of retpoline call sites Provide infrastructure for architectures to rewrite/augment compiler generated retpoline calls. Similar to what we do for static_call()s, keep track of the instructions that are retpoline calls. Use the same list_head, since a retpoline call cannot also be a static_call. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Miroslav Benes Link: https://lkml.kernel.org/r/20210326151300.130805...@infradead.org --- tools/objtool/check.c | 34 tools/objtool/include/objtool/arch.h| 2 +- tools/objtool/include/objtool/check.h | 2 +- tools/objtool/include/objtool/objtool.h | 1 +- tools/objtool/objtool.c | 1 +- 5 files changed, 34 insertions(+), 6 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 600fa67..77074db 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -451,7 +451,7 @@ static int create_static_call_sections(struct objtool_file *file) return 0; idx = 0; - list_for_each_entry(insn, >static_call_list, static_call_node) + list_for_each_entry(insn, >static_call_list, call_node) idx++; sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE, @@ -460,7 +460,7 @@ static int create_static_call_sections(struct objtool_file *file) return -1; idx = 0; - list_for_each_entry(insn, >static_call_list, static_call_node) { + list_for_each_entry(insn, >static_call_list, call_node) { site = (struct static_call_site *)sec->data->d_buf + idx; memset(site, 0, sizeof(struct static_call_site)); @@ -829,13 +829,16 @@ static int add_jump_destinations(struct objtool_file *file) else insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL; + list_add_tail(>call_node, + >retpoline_call_list); + insn->retpoline_safe = true; continue; } else if (insn->func) { /* internal or external sibling call (with reloc) */ insn->call_dest = reloc->sym; if (insn->call_dest->static_call_tramp) { - list_add_tail(>static_call_node, + list_add_tail(>call_node, >static_call_list); } continue; @@ -897,7 +900,7 @@ static int add_jump_destinations(struct objtool_file *file) /* internal sibling call (without reloc) */ insn->call_dest = insn->jump_dest->func; if (insn->call_dest->static_call_tramp) { - list_add_tail(>static_call_node, + list_add_tail(>call_node, >static_call_list); } } @@ -981,6 +984,9 @@ static int add_call_destinations(struct objtool_file *file) insn->type = INSN_CALL_DYNAMIC; insn->retpoline_safe = true; + list_add_tail(>call_node, + >retpoline_call_list); + remove_insn_ops(insn); continue; @@ -988,7 +994,7 @@ static int add_call_destinations(struct objtool_file *file) insn->call_dest = reloc->sym; if (insn->call_dest && insn->call_dest->static_call_tramp) { - list_add_tail(>static_call_node, + list_add_tail(>call_node, >static_call_list); } @@ -1714,6 +1720,11 @@ static void mark_rodata(struct objtool_file *file) file->rodata = found; } +__weak int arch_rewrite_retpolines(struct objtool_file *file) +{ + return 0; +} + static int decode_sections(struct objtool_file *file) { int ret; @@ -1742,6 +1753,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + /* +* Must be before add_special_section_alts() as that depends on +* jump_dest being set. +*/ ret = add_jump_destinations(file); if (ret) return ret; @@ -1778,6 +1793,15 @@ static int
[tip: x86/core] x86/retpoline: Simplify retpolines
The following commit has been merged into the x86/core branch of tip: Commit-ID: 2077915516ebb06d36e03cb542ccb833a8b0a3eb Gitweb: https://git.kernel.org/tip/2077915516ebb06d36e03cb542ccb833a8b0a3eb Author:Peter Zijlstra AuthorDate:Fri, 26 Mar 2021 16:12:02 +01:00 Committer: Borislav Petkov CommitterDate: Wed, 31 Mar 2021 22:31:57 +02:00 x86/retpoline: Simplify retpolines Due to c9c324dc22aa ("objtool: Support stack layout changes in alternatives") it is possible to simplify the retpolines. Currently our retpolines consist of 2 symbols: - __x86_indirect_thunk_\reg: the compiler target - __x86_retpoline_\reg: the actual retpoline. Both are consecutive in code and aligned such that for any one register they both live in the same cacheline: <__x86_indirect_thunk_rax>: 0: ff e0 jmpq *%rax 2: 90 nop 3: 90 nop 4: 90 nop 0005 <__x86_retpoline_rax>: 5: e8 07 00 00 00 callq 11 <__x86_retpoline_rax+0xc> a: f3 90 pause c: 0f ae e8lfence f: eb f9 jmpa <__x86_retpoline_rax+0x5> 11: 48 89 04 24 mov%rax,(%rsp) 15: c3 retq 16: 66 2e 0f 1f 84 00 00 00 00 00 nopw %cs:0x0(%rax,%rax,1) The thunk is an alternative_2, where one option is a jmp to the retpoline. This was done so that objtool didn't need to deal with alternatives with stack ops. But that problem has been solved, so now it is possible to fold the entire retpoline into the alternative to simplify and consolidate unused bytes: <__x86_indirect_thunk_rax>: 0: ff e0 jmpq *%rax 2: 90 nop 3: 90 nop 4: 90 nop 5: 90 nop 6: 90 nop 7: 90 nop 8: 90 nop 9: 90 nop a: 90 nop b: 90 nop c: 90 nop d: 90 nop e: 90 nop f: 90 nop 10: 90 nop 11: 66 66 2e 0f 1f 84 00 00 00 00 00data16 nopw %cs:0x0(%rax,%rax,1) 1c: 0f 1f 40 00 nopl 0x0(%rax) Notice that since the longest alternative sequence is now: 0: e8 07 00 00 00 callq c <.altinstr_replacement+0xc> 5: f3 90 pause 7: 0f ae e8lfence a: eb f9 jmp5 <.altinstr_replacement+0x5> c: 48 89 04 24 mov%rax,(%rsp) 10: c3 retq 17 bytes, we have 15 bytes NOP at the end of our 32 byte slot. (IOW, if we can shrink the retpoline by 1 byte we can pack it more densely). [ bp: Massage commit message. ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210326151259.506071...@infradead.org --- arch/x86/include/asm/asm-prototypes.h | 7 +- arch/x86/include/asm/nospec-branch.h | 6 ++--- arch/x86/lib/retpoline.S | 34 +- tools/objtool/check.c | 3 +-- 4 files changed, 21 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 51e2bf2..0545b07 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -22,15 +22,8 @@ extern void cmpxchg8b_emu(void); #define DECL_INDIRECT_THUNK(reg) \ extern asmlinkage void __x86_indirect_thunk_ ## reg (void); -#define DECL_RETPOLINE(reg) \ - extern asmlinkage void __x86_retpoline_ ## reg (void); - #undef GEN #define GEN(reg) DECL_INDIRECT_THUNK(reg) #include -#undef GEN -#define GEN(reg) DECL_RETPOLINE(reg) -#include - #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 529f8e9..664be73 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -80,7 +80,7 @@ .macro JMP_NOSPEC reg:req #ifdef CONFIG_RETPOLINE ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ - __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \ + __stringify(jmp __x86_indirect_thunk_\reg), X86_FEATURE_RETPOLINE, \ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD #else jmp *%\reg @@ -90,7 +90,7 @@ .macro CALL_NOSPEC reg:req #ifdef CONFIG_RETPOLINE ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \ - __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \ + __stringify(call
[tip: locking/urgent] static_call: Align static_call_is_init() patching condition
The following commit has been merged into the locking/urgent branch of tip: Commit-ID: 698bacefe993ad2922c9d3b1380591ad489355e9 Gitweb: https://git.kernel.org/tip/698bacefe993ad2922c9d3b1380591ad489355e9 Author:Peter Zijlstra AuthorDate:Thu, 18 Mar 2021 11:29:56 +01:00 Committer: Peter Zijlstra CommitterDate: Fri, 19 Mar 2021 13:16:44 +01:00 static_call: Align static_call_is_init() patching condition The intent is to avoid writing init code after init (because the text might have been freed). The code is needlessly different between jump_label and static_call and not obviously correct. The existing code relies on the fact that the module loader clears the init layout, such that within_module_init() always fails, while jump_label relies on the module state which is more obvious and matches the kernel logic. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Jarkko Sakkinen Tested-by: Sumit Garg Link: https://lkml.kernel.org/r/20210318113610.636651...@infradead.org --- kernel/static_call.c | 14 -- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/kernel/static_call.c b/kernel/static_call.c index 080c8a9..fc22590 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -149,6 +149,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) }; for (site_mod = site_mod; site_mod = site_mod->next) { + bool init = system_state < SYSTEM_RUNNING; struct module *mod = site_mod->mod; if (!site_mod->sites) { @@ -168,6 +169,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) if (mod) { stop = mod->static_call_sites + mod->num_static_call_sites; + init = mod->state == MODULE_STATE_COMING; } #endif @@ -175,16 +177,8 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) site < stop && static_call_key(site) == key; site++) { void *site_addr = static_call_addr(site); - if (static_call_is_init(site)) { - /* -* Don't write to call sites which were in -* initmem and have since been freed. -*/ - if (!mod && system_state >= SYSTEM_RUNNING) - continue; - if (mod && !within_module_init((unsigned long)site_addr, mod)) - continue; - } + if (!init && static_call_is_init(site)) + continue; if (!kernel_text_address((unsigned long)site_addr)) { WARN_ONCE(1, "can't patch static call site at %pS",
[tip: locking/urgent] static_call: Fix static_call_update() sanity check
The following commit has been merged into the locking/urgent branch of tip: Commit-ID: 38c93587375053c5b9ef093f4a5ea754538cba32 Gitweb: https://git.kernel.org/tip/38c93587375053c5b9ef093f4a5ea754538cba32 Author:Peter Zijlstra AuthorDate:Thu, 18 Mar 2021 11:31:51 +01:00 Committer: Peter Zijlstra CommitterDate: Fri, 19 Mar 2021 13:16:44 +01:00 static_call: Fix static_call_update() sanity check Sites that match init_section_contains() get marked as INIT. For built-in code init_sections contains both __init and __exit text. OTOH kernel_text_address() only explicitly includes __init text (and there are no __exit text markers). Match what jump_label already does and ignore the warning for INIT sites. Also see the excellent changelog for commit: 8f35eaa5f2de ("jump_label: Don't warn on __exit jump entries") Fixes: 9183c3f9ed710 ("static_call: Add inline static call infrastructure") Reported-by: Sumit Garg Signed-off-by: Peter Zijlstra (Intel) Acked-by: Jarkko Sakkinen Tested-by: Sumit Garg Link: https://lkml.kernel.org/r/20210318113610.739542...@infradead.org --- kernel/jump_label.c | 8 kernel/static_call.c | 11 ++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index c6a39d6..ba39fbb 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -407,6 +407,14 @@ static bool jump_label_can_update(struct jump_entry *entry, bool init) return false; if (!kernel_text_address(jump_entry_code(entry))) { + /* +* This skips patching built-in __exit, which +* is part of init_section_contains() but is +* not part of kernel_text_address(). +* +* Skipping built-in __exit is fine since it +* will never be executed. +*/ WARN_ONCE(!jump_entry_is_init(entry), "can't patch jump_label at %pS", (void *)jump_entry_code(entry)); diff --git a/kernel/static_call.c b/kernel/static_call.c index fc22590..2c5950b 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -181,7 +181,16 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) continue; if (!kernel_text_address((unsigned long)site_addr)) { - WARN_ONCE(1, "can't patch static call site at %pS", + /* +* This skips patching built-in __exit, which +* is part of init_section_contains() but is +* not part of kernel_text_address(). +* +* Skipping built-in __exit is fine since it +* will never be executed. +*/ + WARN_ONCE(!static_call_is_init(site), + "can't patch static call site at %pS", site_addr); continue; }
[tip: locking/urgent] static_call: Fix static_call_set_init()
The following commit has been merged into the locking/urgent branch of tip: Commit-ID: 68b1eddd421d2b16c6655eceb48918a1e896bbbc Gitweb: https://git.kernel.org/tip/68b1eddd421d2b16c6655eceb48918a1e896bbbc Author:Peter Zijlstra AuthorDate:Thu, 18 Mar 2021 11:27:19 +01:00 Committer: Peter Zijlstra CommitterDate: Fri, 19 Mar 2021 13:16:44 +01:00 static_call: Fix static_call_set_init() It turns out that static_call_set_init() does not preserve the other flags; IOW. it clears TAIL if it was set. Fixes: 9183c3f9ed710 ("static_call: Add inline static call infrastructure") Reported-by: Sumit Garg Signed-off-by: Peter Zijlstra (Intel) Acked-by: Jarkko Sakkinen Tested-by: Sumit Garg Link: https://lkml.kernel.org/r/20210318113610.519406...@infradead.org --- kernel/static_call.c | 17 ++--- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/kernel/static_call.c b/kernel/static_call.c index ae82529..080c8a9 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -35,27 +35,30 @@ static inline void *static_call_addr(struct static_call_site *site) return (void *)((long)site->addr + (long)>addr); } +static inline unsigned long __static_call_key(const struct static_call_site *site) +{ + return (long)site->key + (long)>key; +} static inline struct static_call_key *static_call_key(const struct static_call_site *site) { - return (struct static_call_key *) - (((long)site->key + (long)>key) & ~STATIC_CALL_SITE_FLAGS); + return (void *)(__static_call_key(site) & ~STATIC_CALL_SITE_FLAGS); } /* These assume the key is word-aligned. */ static inline bool static_call_is_init(struct static_call_site *site) { - return ((long)site->key + (long)>key) & STATIC_CALL_SITE_INIT; + return __static_call_key(site) & STATIC_CALL_SITE_INIT; } static inline bool static_call_is_tail(struct static_call_site *site) { - return ((long)site->key + (long)>key) & STATIC_CALL_SITE_TAIL; + return __static_call_key(site) & STATIC_CALL_SITE_TAIL; } static inline void static_call_set_init(struct static_call_site *site) { - site->key = ((long)static_call_key(site) | STATIC_CALL_SITE_INIT) - + site->key = (__static_call_key(site) | STATIC_CALL_SITE_INIT) - (long)>key; } @@ -190,7 +193,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) } arch_static_call_transform(site_addr, NULL, func, - static_call_is_tail(site)); + static_call_is_tail(site)); } } @@ -349,7 +352,7 @@ static int static_call_add_module(struct module *mod) struct static_call_site *site; for (site = start; site != stop; site++) { - unsigned long s_key = (long)site->key + (long)>key; + unsigned long s_key = __static_call_key(site); unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS; unsigned long key;
[tip: irq/core] tasklets: Replace spin wait in tasklet_unlock_wait()
The following commit has been merged into the irq/core branch of tip: Commit-ID: da044747401fc16202e223c9da970ed4e84fd84d Gitweb: https://git.kernel.org/tip/da044747401fc16202e223c9da970ed4e84fd84d Author:Peter Zijlstra AuthorDate:Tue, 09 Mar 2021 09:42:08 +01:00 Committer: Thomas Gleixner CommitterDate: Wed, 17 Mar 2021 16:33:55 +01:00 tasklets: Replace spin wait in tasklet_unlock_wait() tasklet_unlock_wait() spin waits for TASKLET_STATE_RUN to be cleared. This is wasting CPU cycles in a tight loop which is especially painful in a guest when the CPU running the tasklet is scheduled out. tasklet_unlock_wait() is invoked from tasklet_kill() which is used in teardown paths and not performance critical at all. Replace the spin wait with wait_var_event(). There are no users of tasklet_unlock_wait() which are invoked from atomic contexts. The usage in tasklet_disable() has been replaced temporarily with the spin waiting variant until the atomic users are fixed up and will be converted to the sleep wait variant later. Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084241.783936...@linutronix.de --- include/linux/interrupt.h | 13 ++--- kernel/softirq.c | 18 ++ 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index b7f0012..b50be4f 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -664,17 +664,8 @@ static inline int tasklet_trylock(struct tasklet_struct *t) return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); } -static inline void tasklet_unlock(struct tasklet_struct *t) -{ - smp_mb__before_atomic(); - clear_bit(TASKLET_STATE_RUN, &(t)->state); -} - -static inline void tasklet_unlock_wait(struct tasklet_struct *t) -{ - while (test_bit(TASKLET_STATE_RUN, >state)) - cpu_relax(); -} +void tasklet_unlock(struct tasklet_struct *t); +void tasklet_unlock_wait(struct tasklet_struct *t); /* * Do not use in new code. Waiting for tasklets from atomic contexts is diff --git a/kernel/softirq.c b/kernel/softirq.c index 8d56bbf..ef6429a 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -632,6 +633,23 @@ void tasklet_kill(struct tasklet_struct *t) } EXPORT_SYMBOL(tasklet_kill); +#ifdef CONFIG_SMP +void tasklet_unlock(struct tasklet_struct *t) +{ + smp_mb__before_atomic(); + clear_bit(TASKLET_STATE_RUN, >state); + smp_mb__after_atomic(); + wake_up_var(>state); +} +EXPORT_SYMBOL_GPL(tasklet_unlock); + +void tasklet_unlock_wait(struct tasklet_struct *t) +{ + wait_var_event(>state, !test_bit(TASKLET_STATE_RUN, >state)); +} +EXPORT_SYMBOL_GPL(tasklet_unlock_wait); +#endif + void __init softirq_init(void) { int cpu;
[tip: irq/core] tasklets: Replace spin wait in tasklet_kill()
The following commit has been merged into the irq/core branch of tip: Commit-ID: 697d8c63c4a2991a22a896a5e6adcdbb28fefe56 Gitweb: https://git.kernel.org/tip/697d8c63c4a2991a22a896a5e6adcdbb28fefe56 Author:Peter Zijlstra AuthorDate:Tue, 09 Mar 2021 09:42:09 +01:00 Committer: Thomas Gleixner CommitterDate: Wed, 17 Mar 2021 16:33:57 +01:00 tasklets: Replace spin wait in tasklet_kill() tasklet_kill() spin waits for TASKLET_STATE_SCHED to be cleared invoking yield() from inside the loop. yield() is an ill defined mechanism and the result might still be wasting CPU cycles in a tight loop which is especially painful in a guest when the CPU running the tasklet is scheduled out. tasklet_kill() is used in teardown paths and not performance critical at all. Replace the spin wait with wait_var_event(). Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210309084241.890532...@linutronix.de --- kernel/softirq.c | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index ef6429a..ba89ca7 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -532,10 +532,12 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) } EXPORT_SYMBOL(__tasklet_hi_schedule); -static bool tasklet_should_run(struct tasklet_struct *t) +static bool tasklet_clear_sched(struct tasklet_struct *t) { - if (test_and_clear_bit(TASKLET_STATE_SCHED, >state)) + if (test_and_clear_bit(TASKLET_STATE_SCHED, >state)) { + wake_up_var(>state); return true; + } WARN_ONCE(1, "tasklet SCHED state not set: %s %pS\n", t->use_callback ? "callback" : "func", @@ -563,7 +565,7 @@ static void tasklet_action_common(struct softirq_action *a, if (tasklet_trylock(t)) { if (!atomic_read(>count)) { - if (tasklet_should_run(t)) { + if (tasklet_clear_sched(t)) { if (t->use_callback) t->callback(t); else @@ -623,13 +625,11 @@ void tasklet_kill(struct tasklet_struct *t) if (in_interrupt()) pr_notice("Attempt to kill tasklet from interrupt\n"); - while (test_and_set_bit(TASKLET_STATE_SCHED, >state)) { - do { - yield(); - } while (test_bit(TASKLET_STATE_SCHED, >state)); - } + while (test_and_set_bit(TASKLET_STATE_SCHED, >state)) + wait_var_event(>state, !test_bit(TASKLET_STATE_SCHED, >state)); + tasklet_unlock_wait(t); - clear_bit(TASKLET_STATE_SCHED, >state); + tasklet_clear_sched(t); } EXPORT_SYMBOL(tasklet_kill);
[tip: x86/cpu] x86: Remove dynamic NOP selection
The following commit has been merged into the x86/cpu branch of tip: Commit-ID: a89dfde3dc3c2dbf56910af75e2d8b11ec5308f6 Gitweb: https://git.kernel.org/tip/a89dfde3dc3c2dbf56910af75e2d8b11ec5308f6 Author:Peter Zijlstra AuthorDate:Fri, 12 Mar 2021 12:32:54 +01:00 Committer: Borislav Petkov CommitterDate: Mon, 15 Mar 2021 16:24:59 +01:00 x86: Remove dynamic NOP selection This ensures that a NOP is a NOP and not a random other instruction that is also a NOP. It allows simplification of dynamic code patching that wants to verify existing code before writing new instructions (ftrace, jump_label, static_call, etc..). Differentiating on NOPs is not a feature. This pessimises 32bit (DONTCARE) and 32bit on 64bit CPUs (CARELESS). 32bit is not a performance target. Everything x86_64 since AMD K10 (2007) and Intel IvyBridge (2012) is fine with using NOPL (as opposed to prefix NOP). And per FEATURE_NOPL being required for x86_64, all x86_64 CPUs can use NOPL. So stop caring about NOPs, simplify things and get on with life. [ The problem seems to be that some uarchs can only decode NOPL on a single front-end port while others have severe decode penalties for excessive prefixes. All modern uarchs can handle both, except Atom, which has prefix penalties. ] [ Also, much doubt you can actually measure any of this on normal workloads. ] After this, FEATURE_NOPL is unused except for required-features for x86_64. FEATURE_K8 is only used for PTI. [ bp: Kernel build measurements showed ~0.3s slowdown on Sandybridge which is hardly a slowdown. Get rid of X86_FEATURE_K7, while at it. ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Acked-by: Alexei Starovoitov # bpf Acked-by: Linus Torvalds Link: https://lkml.kernel.org/r/20210312115749.065275...@infradead.org --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/include/asm/jump_label.h| 12 +-- arch/x86/include/asm/nops.h | 176 +++ arch/x86/include/asm/special_insns.h | 4 +- arch/x86/kernel/alternative.c| 198 ++ arch/x86/kernel/cpu/amd.c| 5 +- arch/x86/kernel/ftrace.c | 4 +- arch/x86/kernel/jump_label.c | 32 + arch/x86/kernel/kprobes/core.c | 2 +- arch/x86/kernel/setup.c | 1 +- arch/x86/kernel/static_call.c| 4 +- arch/x86/net/bpf_jit_comp.c | 8 +- 12 files changed, 97 insertions(+), 351 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index cc96e26..8afa318 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -84,7 +84,7 @@ /* CPU types for specific tunings: */ #define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ +/* FREE, was #define X86_FEATURE_K7( 3*32+ 5) "" Athlon */ #define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ #define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 06c3cc2..5ce342b 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -6,12 +6,6 @@ #define JUMP_LABEL_NOP_SIZE 5 -#ifdef CONFIG_X86_64 -# define STATIC_KEY_INIT_NOP P6_NOP5_ATOMIC -#else -# define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC -#endif - #include #include @@ -23,7 +17,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("1:" - ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" + ".byte " __stringify(BYTES_NOP5) "\n\t" ".pushsection __jump_table, \"aw\" \n\t" _ASM_ALIGN "\n\t" ".long 1b - ., %l[l_yes] - . \n\t" @@ -63,7 +57,7 @@ l_yes: .long \target - .Lstatic_jump_after_\@ .Lstatic_jump_after_\@: .else - .byte STATIC_KEY_INIT_NOP + .byte BYTES_NOP5 .endif .pushsection __jump_table, "aw" _ASM_ALIGN @@ -75,7 +69,7 @@ l_yes: .macro STATIC_JUMP_IF_FALSE target, key, def .Lstatic_jump_\@: .if \def - .byte STATIC_KEY_INIT_NOP + .byte BYTES_NOP5 .else /* Equivalent to "jmp.d32 \target" */ .byte 0xe9 diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h index 12f12b5..c1e5e81 100644 --- a/arch/x86/include/asm/nops.h +++ b/arch/x86/include/asm/nops.h @@ -4,89 +4,58 @@ /* * Define nops for use with alternative() and for tracing. - * - * *_NOP5_ATOMIC must be a single instruction. */ -#define NOP_DS_PREFIX 0x3e +#ifndef CONFIG_64BIT -/* generic versions from gas - 1: nop - the following instructions are
[tip: x86/cpu] objtool/x86: Use asm/nops.h
The following commit has been merged into the x86/cpu branch of tip: Commit-ID: 301cddc21a157a3072d789a3097857202e550a24 Gitweb: https://git.kernel.org/tip/301cddc21a157a3072d789a3097857202e550a24 Author:Peter Zijlstra AuthorDate:Fri, 12 Mar 2021 12:32:55 +01:00 Committer: Borislav Petkov CommitterDate: Mon, 15 Mar 2021 16:37:37 +01:00 objtool/x86: Use asm/nops.h Since the kernel will rely on a single canonical set of NOPs, make sure objtool uses the exact same ones. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210312115749.136357...@infradead.org --- tools/arch/x86/include/asm/nops.h | 81 ++- tools/objtool/arch/x86/decode.c | 13 +++-- tools/objtool/sync-check.sh | 1 +- 3 files changed, 90 insertions(+), 5 deletions(-) create mode 100644 tools/arch/x86/include/asm/nops.h diff --git a/tools/arch/x86/include/asm/nops.h b/tools/arch/x86/include/asm/nops.h new file mode 100644 index 000..c1e5e81 --- /dev/null +++ b/tools/arch/x86/include/asm/nops.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_NOPS_H +#define _ASM_X86_NOPS_H + +/* + * Define nops for use with alternative() and for tracing. + */ + +#ifndef CONFIG_64BIT + +/* + * Generic 32bit nops from GAS: + * + * 1: nop + * 2: movl %esi,%esi + * 3: leal 0x0(%esi),%esi + * 4: leal 0x0(%esi,%eiz,1),%esi + * 5: leal %ds:0x0(%esi,%eiz,1),%esi + * 6: leal 0x0(%esi),%esi + * 7: leal 0x0(%esi,%eiz,1),%esi + * 8: leal %ds:0x0(%esi,%eiz,1),%esi + * + * Except 5 and 8, which are DS prefixed 4 and 7 resp, where GAS would emit 2 + * nop instructions. + */ +#define BYTES_NOP1 0x90 +#define BYTES_NOP2 0x89,0xf6 +#define BYTES_NOP3 0x8d,0x76,0x00 +#define BYTES_NOP4 0x8d,0x74,0x26,0x00 +#define BYTES_NOP5 0x3e,BYTES_NOP4 +#define BYTES_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00 +#define BYTES_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00 +#define BYTES_NOP8 0x3e,BYTES_NOP7 + +#else + +/* + * Generic 64bit nops from GAS: + * + * 1: nop + * 2: osp nop + * 3: nopl (%eax) + * 4: nopl 0x00(%eax) + * 5: nopl 0x00(%eax,%eax,1) + * 6: osp nopl 0x00(%eax,%eax,1) + * 7: nopl 0x(%eax) + * 8: nopl 0x(%eax,%eax,1) + */ +#define BYTES_NOP1 0x90 +#define BYTES_NOP2 0x66,BYTES_NOP1 +#define BYTES_NOP3 0x0f,0x1f,0x00 +#define BYTES_NOP4 0x0f,0x1f,0x40,0x00 +#define BYTES_NOP5 0x0f,0x1f,0x44,0x00,0x00 +#define BYTES_NOP6 0x66,BYTES_NOP5 +#define BYTES_NOP7 0x0f,0x1f,0x80,0x00,0x00,0x00,0x00 +#define BYTES_NOP8 0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 + +#endif /* CONFIG_64BIT */ + +#ifdef __ASSEMBLY__ +#define _ASM_MK_NOP(x) .byte x +#else +#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n" +#endif + +#define ASM_NOP1 _ASM_MK_NOP(BYTES_NOP1) +#define ASM_NOP2 _ASM_MK_NOP(BYTES_NOP2) +#define ASM_NOP3 _ASM_MK_NOP(BYTES_NOP3) +#define ASM_NOP4 _ASM_MK_NOP(BYTES_NOP4) +#define ASM_NOP5 _ASM_MK_NOP(BYTES_NOP5) +#define ASM_NOP6 _ASM_MK_NOP(BYTES_NOP6) +#define ASM_NOP7 _ASM_MK_NOP(BYTES_NOP7) +#define ASM_NOP8 _ASM_MK_NOP(BYTES_NOP8) + +#define ASM_NOP_MAX 8 + +#ifndef __ASSEMBLY__ +extern const unsigned char * const x86_nops[]; +#endif + +#endif /* _ASM_X86_NOPS_H */ diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 549813c..c117bfc 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -11,6 +11,9 @@ #include "../../../arch/x86/lib/inat.c" #include "../../../arch/x86/lib/insn.c" +#define CONFIG_64BIT 1 +#include + #include #include #include @@ -596,11 +599,11 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state) const char *arch_nop_insn(int len) { static const char nops[5][5] = { - /* 1 */ { 0x90 }, - /* 2 */ { 0x66, 0x90 }, - /* 3 */ { 0x0f, 0x1f, 0x00 }, - /* 4 */ { 0x0f, 0x1f, 0x40, 0x00 }, - /* 5 */ { 0x0f, 0x1f, 0x44, 0x00, 0x00 }, + { BYTES_NOP1 }, + { BYTES_NOP2 }, + { BYTES_NOP3 }, + { BYTES_NOP4 }, + { BYTES_NOP5 }, }; if (len < 1 || len > 5) { diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh index 606a4b5..d232686 100755 --- a/tools/objtool/sync-check.sh +++ b/tools/objtool/sync-check.sh @@ -10,6 +10,7 @@ FILES="include/linux/objtool.h" if [ "$SRCARCH" = "x86" ]; then FILES="$FILES +arch/x86/include/asm/nops.h arch/x86/include/asm/inat_types.h arch/x86/include/asm/orc_types.h arch/x86/include/asm/emulate_prefix.h
[tip: objtool/urgent] objtool,x86: Fix uaccess PUSHF/POPF validation
The following commit has been merged into the objtool/urgent branch of tip: Commit-ID: ba08abca66d46381df60842f64f70099d5482b92 Gitweb: https://git.kernel.org/tip/ba08abca66d46381df60842f64f70099d5482b92 Author:Peter Zijlstra AuthorDate:Mon, 08 Mar 2021 15:46:04 +01:00 Committer: Peter Zijlstra CommitterDate: Fri, 12 Mar 2021 09:15:49 +01:00 objtool,x86: Fix uaccess PUSHF/POPF validation Commit ab234a260b1f ("x86/pv: Rework arch_local_irq_restore() to not use popf") replaced "push %reg; popf" with something like: "test $0x200, %reg; jz 1f; sti; 1:", which breaks the pushf/popf symmetry that commit ea24213d8088 ("objtool: Add UACCESS validation") relies on. The result is: drivers/gpu/drm/amd/amdgpu/si.o: warning: objtool: si_common_hw_init()+0xf36: PUSHF stack exhausted Meanwhile, commit c9c324dc22aa ("objtool: Support stack layout changes in alternatives") makes that we can actually use stack-ops in alternatives, which means we can revert 1ff865e343c2 ("x86,smap: Fix smap_{save,restore}() alternatives"). That in turn means we can limit the PUSHF/POPF handling of ea24213d8088 to those instructions that are in alternatives. Fixes: ab234a260b1f ("x86/pv: Rework arch_local_irq_restore() to not use popf") Reported-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/yey4ribqya5fn...@hirez.programming.kicks-ass.net --- arch/x86/include/asm/smap.h | 10 -- tools/objtool/check.c | 3 +++ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index 8b58d69..0bc9b08 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -58,9 +58,8 @@ static __always_inline unsigned long smap_save(void) unsigned long flags; asm volatile ("# smap_save\n\t" - ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP) - "pushf; pop %0; " __ASM_CLAC "\n\t" - "1:" + ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC "\n\t", + X86_FEATURE_SMAP) : "=rm" (flags) : : "memory", "cc"); return flags; @@ -69,9 +68,8 @@ static __always_inline unsigned long smap_save(void) static __always_inline void smap_restore(unsigned long flags) { asm volatile ("# smap_restore\n\t" - ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP) - "push %0; popf\n\t" - "1:" + ALTERNATIVE("", "push %0; popf\n\t", + X86_FEATURE_SMAP) : : "g" (flags) : "memory", "cc"); } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 068cdb4..5e5388a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2442,6 +2442,9 @@ static int handle_insn_ops(struct instruction *insn, struct insn_state *state) if (update_cfi_state(insn, >cfi, op)) return 1; + if (!insn->alt_group) + continue; + if (op->dest.type == OP_DEST_PUSHF) { if (!state->uaccess_stack) { state->uaccess_stack = 1;
[tip: locking/urgent] u64_stats,lockdep: Fix u64_stats_init() vs lockdep
The following commit has been merged into the locking/urgent branch of tip: Commit-ID: d5b0e0677bfd5efd17c5bbb00156931f0d41cb85 Gitweb: https://git.kernel.org/tip/d5b0e0677bfd5efd17c5bbb00156931f0d41cb85 Author:Peter Zijlstra AuthorDate:Mon, 08 Mar 2021 09:38:12 +01:00 Committer: Peter Zijlstra CommitterDate: Wed, 10 Mar 2021 09:51:45 +01:00 u64_stats,lockdep: Fix u64_stats_init() vs lockdep Jakub reported that: static struct net_device *rtl8139_init_board(struct pci_dev *pdev) { ... u64_stats_init(>rx_stats.syncp); u64_stats_init(>tx_stats.syncp); ... } results in lockdep getting confused between the RX and TX stats lock. This is because u64_stats_init() is an inline calling seqcount_init(), which is a macro using a static variable to generate a lockdep class. By wrapping that in an inline, we negate the effect of the macro and fold the static key variable, hence the confusion. Fix by also making u64_stats_init() a macro for the case where it matters, leaving the other case an inline for argument validation etc. Reported-by: Jakub Kicinski Debugged-by: "Ahmed S. Darwish" Signed-off-by: Peter Zijlstra (Intel) Tested-by: "Erhard F." Link: https://lkml.kernel.org/r/yexicy6+9mksd...@hirez.programming.kicks-ass.net --- include/linux/u64_stats_sync.h | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index c6abb79..e81856c 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -115,12 +115,13 @@ static inline void u64_stats_inc(u64_stats_t *p) } #endif +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) +#define u64_stats_init(syncp) seqcount_init(&(syncp)->seq) +#else static inline void u64_stats_init(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - seqcount_init(>seq); -#endif } +#endif static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) {
[tip: locking/urgent] seqlock,lockdep: Fix seqcount_latch_init()
The following commit has been merged into the locking/urgent branch of tip: Commit-ID: 4817a52b306136c8b2b2271d8770401441e4cf79 Gitweb: https://git.kernel.org/tip/4817a52b306136c8b2b2271d8770401441e4cf79 Author:Peter Zijlstra AuthorDate:Tue, 09 Mar 2021 15:21:18 +01:00 Committer: Peter Zijlstra CommitterDate: Wed, 10 Mar 2021 09:51:45 +01:00 seqlock,lockdep: Fix seqcount_latch_init() seqcount_init() must be a macro in order to preserve the static variable that is used for the lockdep key. Don't then wrap it in an inline function, which destroys that. Luckily there aren't many users of this function, but fix it before it becomes a problem. Fixes: 80793c3471d9 ("seqlock: Introduce seqcount_latch_t") Reported-by: Eric Dumazet Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/yeefebnuvkzax...@hirez.programming.kicks-ass.net --- include/linux/seqlock.h | 5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 2f7bb92..f61e34f 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -664,10 +664,7 @@ typedef struct { * seqcount_latch_init() - runtime initializer for seqcount_latch_t * @s: Pointer to the seqcount_latch_t instance */ -static inline void seqcount_latch_init(seqcount_latch_t *s) -{ - seqcount_init(>seqcount); -} +#define seqcount_latch_init(s) seqcount_init(&(s)->seqcount) /** * raw_read_seqcount_latch() - pick even/odd latch data copy
[tip: x86/mm] smp: Micro-optimize smp_call_function_many_cond()
The following commit has been merged into the x86/mm branch of tip: Commit-ID: d43f17a1da25373580ebb466de7d0641acbf6fd6 Gitweb: https://git.kernel.org/tip/d43f17a1da25373580ebb466de7d0641acbf6fd6 Author:Peter Zijlstra AuthorDate:Tue, 02 Mar 2021 08:02:43 +01:00 Committer: Ingo Molnar CommitterDate: Sat, 06 Mar 2021 13:00:22 +01:00 smp: Micro-optimize smp_call_function_many_cond() Call the generic send_call_function_single_ipi() function, which will avoid the IPI when @last_cpu is idle. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/smp.c b/kernel/smp.c index b6375d7..af0d51d 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -694,7 +694,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask, * provided mask. */ if (nr_cpus == 1) - arch_send_call_function_single_ipi(last_cpu); + send_call_function_single_ipi(last_cpu); else if (likely(nr_cpus > 1)) arch_send_call_function_ipi_mask(cfd->cpumask_ipi); }
[tip: locking/core] static_call: Fix the module key fixup
The following commit has been merged into the locking/core branch of tip: Commit-ID: 50bf8080a94d171e843fc013abec19d8ab9f50ae Gitweb: https://git.kernel.org/tip/50bf8080a94d171e843fc013abec19d8ab9f50ae Author:Peter Zijlstra AuthorDate:Thu, 25 Feb 2021 23:03:51 +01:00 Committer: Ingo Molnar CommitterDate: Sat, 06 Mar 2021 12:49:08 +01:00 static_call: Fix the module key fixup Provided the target address of a R_X86_64_PC32 relocation is aligned, the low two bits should be invariant between the relative and absolute value. Turns out the address is not aligned and things go sideways, ensure we transfer the bits in the absolute form when fixing up the key address. Fixes: 73f44fe19d35 ("static_call: Allow module use without exposing static_call_key") Reported-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Tested-by: Steven Rostedt (VMware) Link: https://lkml.kernel.org/r/20210225220351.ge4...@worktop.programming.kicks-ass.net --- kernel/static_call.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/static_call.c b/kernel/static_call.c index 6906c6e..ae82529 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -349,7 +349,8 @@ static int static_call_add_module(struct module *mod) struct static_call_site *site; for (site = start; site != stop; site++) { - unsigned long addr = (unsigned long)static_call_key(site); + unsigned long s_key = (long)site->key + (long)>key; + unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS; unsigned long key; /* @@ -373,8 +374,8 @@ static int static_call_add_module(struct module *mod) return -EINVAL; } - site->key = (key - (long)>key) | - (site->key & STATIC_CALL_SITE_FLAGS); + key |= s_key & STATIC_CALL_SITE_FLAGS; + site->key = key - (long)>key; } return __static_call_init(mod, start, stop);
[tip: objtool/core] objtool,x86: More ModRM sugar
The following commit has been merged into the objtool/core branch of tip: Commit-ID: 36d92e43d01cbeeec99abdf405362243051d6b3f Gitweb: https://git.kernel.org/tip/36d92e43d01cbeeec99abdf405362243051d6b3f Author:Peter Zijlstra AuthorDate:Fri, 12 Feb 2021 09:13:00 +01:00 Committer: Ingo Molnar CommitterDate: Sat, 06 Mar 2021 12:44:23 +01:00 objtool,x86: More ModRM sugar Better helpers to decode ModRM. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/YCZB/ljatfxqq...@hirez.programming.kicks-ass.net --- tools/objtool/arch/x86/decode.c | 28 +--- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index b42e5ec..431bafb 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -82,15 +82,21 @@ unsigned long arch_jump_destination(struct instruction *insn) * 01 | [r/m + d8]|[S+d]| [r/m + d8] | * 10 | [r/m + d32] |[S+D]| [r/m + d32] | * 11 | r/ m | - * */ + +#define mod_is_mem() (modrm_mod != 3) +#define mod_is_reg() (modrm_mod == 3) + #define is_RIP() ((modrm_rm & 7) == CFI_BP && modrm_mod == 0) -#define have_SIB() ((modrm_rm & 7) == CFI_SP && modrm_mod != 3) +#define have_SIB() ((modrm_rm & 7) == CFI_SP && mod_is_mem()) #define rm_is(reg) (have_SIB() ? \ sib_base == (reg) && sib_index == CFI_SP : \ modrm_rm == (reg)) +#define rm_is_mem(reg) (mod_is_mem() && !is_RIP() && rm_is(reg)) +#define rm_is_reg(reg) (mod_is_reg() && modrm_rm == (reg)) + int arch_decode_instruction(const struct elf *elf, const struct section *sec, unsigned long offset, unsigned int maxlen, unsigned int *len, enum insn_type *type, @@ -154,7 +160,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, case 0x1: case 0x29: - if (rex_w && modrm_mod == 3 && modrm_rm == CFI_SP) { + if (rex_w && rm_is_reg(CFI_SP)) { /* add/sub reg, %rsp */ ADD_OP(op) { @@ -219,7 +225,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; /* %rsp target only */ - if (!(modrm_mod == 3 && modrm_rm == CFI_SP)) + if (!rm_is_reg(CFI_SP)) break; imm = insn.immediate.value; @@ -272,7 +278,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, if (modrm_reg == CFI_SP) { - if (modrm_mod == 3) { + if (mod_is_reg()) { /* mov %rsp, reg */ ADD_OP(op) { op->src.type = OP_SRC_REG; @@ -308,7 +314,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; } - if (modrm_mod == 3 && modrm_rm == CFI_SP) { + if (rm_is_reg(CFI_SP)) { /* mov reg, %rsp */ ADD_OP(op) { @@ -325,7 +331,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, if (!rex_w) break; - if ((modrm_mod == 1 || modrm_mod == 2) && modrm_rm == CFI_BP) { + if (rm_is_mem(CFI_BP)) { /* mov reg, disp(%rbp) */ ADD_OP(op) { @@ -338,7 +344,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; } - if (modrm_mod != 3 && rm_is(CFI_SP)) { + if (rm_is_mem(CFI_SP)) { /* mov reg, disp(%rsp) */ ADD_OP(op) { @@ -357,7 +363,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, if (!rex_w) break; - if ((modrm_mod == 1 || modrm_mod == 2) && modrm_rm == CFI_BP) { + if (rm_is_mem(CFI_BP)) { /* mov disp(%rbp), reg */ ADD_OP(op) { @@ -370,7 +376,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; } - if (modrm_mod != 3 && rm_is(CFI_SP)) { + if (rm_is_mem(CFI_SP)) { /* mov disp(%rsp), reg */ ADD_OP(op) { @@ -386,7 +392,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; case 0x8d: - if (modrm_mod == 3) { + if (mod_is_reg()) {
[tip: objtool/core] objtool: Collate parse_options() users
The following commit has been merged into the objtool/core branch of tip: Commit-ID: a2f605f9ff57397d05a8e2f282b78a69f574d305 Gitweb: https://git.kernel.org/tip/a2f605f9ff57397d05a8e2f282b78a69f574d305 Author:Peter Zijlstra AuthorDate:Fri, 26 Feb 2021 11:18:24 +01:00 Committer: Ingo Molnar CommitterDate: Sat, 06 Mar 2021 12:44:23 +01:00 objtool: Collate parse_options() users Ensure there's a single place that parses check_options, in preparation for extending where to get options from. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20210226110004.193108...@infradead.org --- tools/objtool/builtin-check.c | 14 +- tools/objtool/builtin-orc.c | 5 + tools/objtool/include/objtool/builtin.h | 2 ++ 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 97f063d..0399752 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -42,17 +42,21 @@ const struct option check_options[] = { OPT_END(), }; +int cmd_parse_options(int argc, const char **argv, const char * const usage[]) +{ + argc = parse_options(argc, argv, check_options, usage, 0); + if (argc != 1) + usage_with_options(usage, check_options); + return argc; +} + int cmd_check(int argc, const char **argv) { const char *objname; struct objtool_file *file; int ret; - argc = parse_options(argc, argv, check_options, check_usage, 0); - - if (argc != 1) - usage_with_options(check_usage, check_options); - + argc = cmd_parse_options(argc, argv, check_usage); objname = argv[0]; file = objtool_open_read(objname); diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c index 8273bbf..17f8b93 100644 --- a/tools/objtool/builtin-orc.c +++ b/tools/objtool/builtin-orc.c @@ -34,10 +34,7 @@ int cmd_orc(int argc, const char **argv) struct objtool_file *file; int ret; - argc = parse_options(argc, argv, check_options, orc_usage, 0); - if (argc != 1) - usage_with_options(orc_usage, check_options); - + argc = cmd_parse_options(argc, argv, orc_usage); objname = argv[0]; file = objtool_open_read(objname); diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index d019210..15ac0b7 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -11,6 +11,8 @@ extern const struct option check_options[]; extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr, backup; +extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); + extern int cmd_check(int argc, const char **argv); extern int cmd_orc(int argc, const char **argv);
[tip: objtool/core] objtool: Parse options from OBJTOOL_ARGS
The following commit has been merged into the objtool/core branch of tip: Commit-ID: 900b4df347bbac4874149a226143a556909faba8 Gitweb: https://git.kernel.org/tip/900b4df347bbac4874149a226143a556909faba8 Author:Peter Zijlstra AuthorDate:Fri, 26 Feb 2021 11:32:30 +01:00 Committer: Ingo Molnar CommitterDate: Sat, 06 Mar 2021 12:44:23 +01:00 objtool: Parse options from OBJTOOL_ARGS Teach objtool to parse options from the OBJTOOL_ARGS environment variable. This enables things like: $ OBJTOOL_ARGS="--backup" make O=defconfig-build/ kernel/ponies.o to obtain both defconfig-build/kernel/ponies.o{,.orig} and easily inspect what objtool actually did. Suggested-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20210226110004.252553...@infradead.org --- tools/objtool/builtin-check.c | 25 + 1 file changed, 25 insertions(+) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 0399752..8b38b5d 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -26,6 +27,11 @@ static const char * const check_usage[] = { NULL, }; +static const char * const env_usage[] = { + "OBJTOOL_ARGS=\"\"", + NULL, +}; + const struct option check_options[] = { OPT_BOOLEAN('f', "no-fp", _fp, "Skip frame pointer validation"), OPT_BOOLEAN('u', "no-unreachable", _unreachable, "Skip 'unreachable instruction' warnings"), @@ -44,6 +50,25 @@ const struct option check_options[] = { int cmd_parse_options(int argc, const char **argv, const char * const usage[]) { + const char *envv[16] = { }; + char *env; + int envc; + + env = getenv("OBJTOOL_ARGS"); + if (env) { + envv[0] = "OBJTOOL_ARGS"; + for (envc = 1; envc < ARRAY_SIZE(envv); ) { + envv[envc++] = env; + env = strchr(env, ' '); + if (!env) + break; + *env = '\0'; + env++; + } + + parse_options(envc, envv, check_options, env_usage, 0); + } + argc = parse_options(argc, argv, check_options, usage, 0); if (argc != 1) usage_with_options(usage, check_options);
[tip: objtool/core] objtool,x86: Rewrite LEA decode
The following commit has been merged into the objtool/core branch of tip: Commit-ID: 2ee0c363492f1acc1082125218e6a80c0d7d502b Gitweb: https://git.kernel.org/tip/2ee0c363492f1acc1082125218e6a80c0d7d502b Author:Peter Zijlstra AuthorDate:Tue, 09 Feb 2021 21:29:16 +01:00 Committer: Ingo Molnar CommitterDate: Sat, 06 Mar 2021 12:44:23 +01:00 objtool,x86: Rewrite LEA decode Current LEA decoding is a bunch of special cases, properly decode the instruction, with exception of full SIB and RIP-relative modes. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173627.143250...@infradead.org --- tools/objtool/arch/x86/decode.c | 86 ++-- 1 file changed, 28 insertions(+), 58 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 549813c..d8f0138 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -91,9 +91,10 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, { struct insn insn; int x86_64, sign; - unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, - rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, - modrm_reg = 0, sib = 0; + unsigned char op1, op2, + rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, + modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, + sib = 0; struct stack_op *op = NULL; struct symbol *sym; @@ -328,68 +329,37 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; case 0x8d: - if (sib == 0x24 && rex_w && !rex_b && !rex_x) { - - ADD_OP(op) { - if (!insn.displacement.value) { - /* lea (%rsp), reg */ - op->src.type = OP_SRC_REG; - } else { - /* lea disp(%rsp), reg */ - op->src.type = OP_SRC_ADD; - op->src.offset = insn.displacement.value; - } - op->src.reg = CFI_SP; - op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; - } - - } else if (rex == 0x48 && modrm == 0x65) { - - /* lea disp(%rbp), %rsp */ - ADD_OP(op) { - op->src.type = OP_SRC_ADD; - op->src.reg = CFI_BP; - op->src.offset = insn.displacement.value; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; - } + if (modrm_mod == 3) { + WARN("invalid LEA encoding at %s:0x%lx", sec->name, offset); + break; + } - } else if (rex == 0x49 && modrm == 0x62 && - insn.displacement.value == -8) { + /* skip non 64bit ops */ + if (!rex_w) + break; - /* -* lea -0x8(%r10), %rsp -* -* Restoring rsp back to its original value after a -* stack realignment. -*/ - ADD_OP(op) { - op->src.type = OP_SRC_ADD; - op->src.reg = CFI_R10; - op->src.offset = -8; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; - } + /* skip nontrivial SIB */ + if (modrm_rm == 4 && !(sib == 0x24 && rex_b == rex_x)) + break; - } else if (rex == 0x49 && modrm == 0x65 && - insn.displacement.value == -16) { + /* skip RIP relative displacement */ + if (modrm_rm == 5 && modrm_mod == 0) + break; - /* -* lea -0x10(%r13), %rsp -* -* Restoring rsp back to its original value after a -* stack realignment. -*/ - ADD_OP(op) { + /* lea disp(%src), %dst */ + ADD_OP(op) { + op->src.offset = insn.displacement.value; + if (!op->src.offset) { +
[tip: objtool/core] objtool,x86: Rewrite LEAVE
The following commit has been merged into the objtool/core branch of tip: Commit-ID: ffc7e74f36a2c7424da262a32a0bbe59669677ef Gitweb: https://git.kernel.org/tip/ffc7e74f36a2c7424da262a32a0bbe59669677ef Author:Peter Zijlstra AuthorDate:Tue, 09 Feb 2021 21:41:13 +01:00 Committer: Ingo Molnar CommitterDate: Sat, 06 Mar 2021 12:44:23 +01:00 objtool,x86: Rewrite LEAVE Since we can now have multiple stack-ops per instruction, we don't need to special case LEAVE and can simply emit the composite operations. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173627.253273...@infradead.org --- tools/objtool/arch/x86/decode.c | 14 +++--- tools/objtool/check.c| 24 ++-- tools/objtool/include/objtool/arch.h | 1 - 3 files changed, 13 insertions(+), 26 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index d8f0138..47b9acf 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -446,9 +446,17 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, * mov bp, sp * pop bp */ - ADD_OP(op) - op->dest.type = OP_DEST_LEAVE; - + ADD_OP(op) { + op->src.type = OP_SRC_REG; + op->src.reg = CFI_BP; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + } + ADD_OP(op) { + op->src.type = OP_SRC_POP; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_BP; + } break; case 0xe3: diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 12b8f0f..a0f762a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2020,7 +2020,7 @@ static int update_cfi_state(struct instruction *insn, } else if (op->src.reg == CFI_BP && op->dest.reg == CFI_SP && -cfa->base == CFI_BP) { +(cfa->base == CFI_BP || cfa->base == cfi->drap_reg)) { /* * mov %rbp, %rsp @@ -2217,7 +2217,7 @@ static int update_cfi_state(struct instruction *insn, cfa->offset = 0; cfi->drap_offset = -1; - } else if (regs[op->dest.reg].offset == -cfi->stack_size) { + } else if (cfi->stack_size == -regs[op->dest.reg].offset) { /* pop %reg */ restore_reg(cfi, op->dest.reg); @@ -2358,26 +2358,6 @@ static int update_cfi_state(struct instruction *insn, break; - case OP_DEST_LEAVE: - if ((!cfi->drap && cfa->base != CFI_BP) || - (cfi->drap && cfa->base != cfi->drap_reg)) { - WARN_FUNC("leave instruction with modified stack frame", - insn->sec, insn->offset); - return -1; - } - - /* leave (mov %rbp, %rsp; pop %rbp) */ - - cfi->stack_size = -cfi->regs[CFI_BP].offset - 8; - restore_reg(cfi, CFI_BP); - - if (!cfi->drap) { - cfa->base = CFI_SP; - cfa->offset -= 8; - } - - break; - case OP_DEST_MEM: if (op->src.type != OP_SRC_POP && op->src.type != OP_SRC_POPF) { WARN_FUNC("unknown stack-related memory operation", diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 6ff0685..ff21f38 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -35,7 +35,6 @@ enum op_dest_type { OP_DEST_MEM, OP_DEST_PUSH, OP_DEST_PUSHF, - OP_DEST_LEAVE, }; struct op_dest {
[tip: objtool/core] objtool,x86: Renumber CFI_reg
The following commit has been merged into the objtool/core branch of tip: Commit-ID: d473b18b2ef62563fb874f9cae6e123f99129e3f Gitweb: https://git.kernel.org/tip/d473b18b2ef62563fb874f9cae6e123f99129e3f Author:Peter Zijlstra AuthorDate:Tue, 09 Feb 2021 20:18:21 +01:00 Committer: Ingo Molnar CommitterDate: Sat, 06 Mar 2021 12:44:22 +01:00 objtool,x86: Renumber CFI_reg Make them match the instruction encoding numbering. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173627.033720...@infradead.org --- tools/objtool/arch/x86/include/arch/cfi_regs.h | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/objtool/arch/x86/include/arch/cfi_regs.h b/tools/objtool/arch/x86/include/arch/cfi_regs.h index 79bc517..0579d22 100644 --- a/tools/objtool/arch/x86/include/arch/cfi_regs.h +++ b/tools/objtool/arch/x86/include/arch/cfi_regs.h @@ -4,13 +4,13 @@ #define _OBJTOOL_CFI_REGS_H #define CFI_AX 0 -#define CFI_DX 1 -#define CFI_CX 2 +#define CFI_CX 1 +#define CFI_DX 2 #define CFI_BX 3 -#define CFI_SI 4 -#define CFI_DI 5 -#define CFI_BP 6 -#define CFI_SP 7 +#define CFI_SP 4 +#define CFI_BP 5 +#define CFI_SI 6 +#define CFI_DI 7 #define CFI_R8 8 #define CFI_R9 9 #define CFI_R1010
[tip: objtool/core] objtool: Allow UNWIND_HINT to suppress dodgy stack modifications
The following commit has been merged into the objtool/core branch of tip: Commit-ID: d54dba41999498b38a40940e1123019d50b26496 Gitweb: https://git.kernel.org/tip/d54dba41999498b38a40940e1123019d50b26496 Author:Peter Zijlstra AuthorDate:Thu, 11 Feb 2021 13:03:28 +01:00 Committer: Ingo Molnar CommitterDate: Sat, 06 Mar 2021 12:44:22 +01:00 objtool: Allow UNWIND_HINT to suppress dodgy stack modifications rewind_stack_do_exit() UNWIND_HINT_FUNC /* Prevent any naive code from trying to unwind to our caller. */ xorl%ebp, %ebp movqPER_CPU_VAR(cpu_current_top_of_stack), %rax leaq-PTREGS_SIZE(%rax), %rsp UNWIND_HINT_REGS calldo_exit Does unspeakable things to the stack, which objtool currently fails to detect due to a limitation in instruction decoding. This will be rectified after which the above will result in: arch/x86/entry/entry_64.o: warning: objtool: .text+0xab: unsupported stack register modification Allow the UNWIND_HINT on the next instruction to suppress this, it will overwrite the state anyway. Suggested-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173626.918498...@infradead.org --- tools/objtool/check.c | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 068cdb4..12b8f0f 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1959,8 +1959,9 @@ static void restore_reg(struct cfi_state *cfi, unsigned char reg) * 41 5d pop%r13 * c3retq */ -static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, -struct stack_op *op) +static int update_cfi_state(struct instruction *insn, + struct instruction *next_insn, + struct cfi_state *cfi, struct stack_op *op) { struct cfi_reg *cfa = >cfa; struct cfi_reg *regs = cfi->regs; @@ -2161,7 +2162,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, break; } - if (op->dest.reg == cfi->cfa.base) { + if (op->dest.reg == cfi->cfa.base && !(next_insn && next_insn->hint)) { WARN_FUNC("unsupported stack register modification", insn->sec, insn->offset); return -1; @@ -2433,13 +2434,15 @@ static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn return 0; } -static int handle_insn_ops(struct instruction *insn, struct insn_state *state) +static int handle_insn_ops(struct instruction *insn, + struct instruction *next_insn, + struct insn_state *state) { struct stack_op *op; list_for_each_entry(op, >stack_ops, list) { - if (update_cfi_state(insn, >cfi, op)) + if (update_cfi_state(insn, next_insn, >cfi, op)) return 1; if (op->dest.type == OP_DEST_PUSHF) { @@ -2719,7 +2722,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 0; } - if (handle_insn_ops(insn, )) + if (handle_insn_ops(insn, next_insn, )) return 1; switch (insn->type) {
[tip: objtool/core] objtool,x86: Simplify register decode
The following commit has been merged into the objtool/core branch of tip: Commit-ID: 16ef7f159c503c7befec7018ee0e82fdc311721e Gitweb: https://git.kernel.org/tip/16ef7f159c503c7befec7018ee0e82fdc311721e Author:Peter Zijlstra AuthorDate:Tue, 09 Feb 2021 19:59:43 +01:00 Committer: Ingo Molnar CommitterDate: Sat, 06 Mar 2021 12:44:23 +01:00 objtool,x86: Simplify register decode Since the CFI_reg number now matches the instruction encoding order do away with the op_to_cfi_reg[] and use direct assignment. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173627.362004...@infradead.org --- tools/objtool/arch/x86/decode.c | 79 +++- 1 file changed, 39 insertions(+), 40 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 47b9acf..5ce7dc4 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -17,17 +17,6 @@ #include #include -static unsigned char op_to_cfi_reg[][2] = { - {CFI_AX, CFI_R8}, - {CFI_CX, CFI_R9}, - {CFI_DX, CFI_R10}, - {CFI_BX, CFI_R11}, - {CFI_SP, CFI_R12}, - {CFI_BP, CFI_R13}, - {CFI_SI, CFI_R14}, - {CFI_DI, CFI_R15}, -}; - static int is_x86_64(const struct elf *elf) { switch (elf->ehdr.e_machine) { @@ -94,7 +83,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, - sib = 0; + sib = 0 /* , sib_scale = 0, sib_index = 0, sib_base = 0 */; struct stack_op *op = NULL; struct symbol *sym; @@ -130,23 +119,29 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, if (insn.modrm.nbytes) { modrm = insn.modrm.bytes[0]; modrm_mod = X86_MODRM_MOD(modrm); - modrm_reg = X86_MODRM_REG(modrm); - modrm_rm = X86_MODRM_RM(modrm); + modrm_reg = X86_MODRM_REG(modrm) + 8*rex_r; + modrm_rm = X86_MODRM_RM(modrm) + 8*rex_b; } - if (insn.sib.nbytes) + if (insn.sib.nbytes) { sib = insn.sib.bytes[0]; + /* + sib_scale = X86_SIB_SCALE(sib); + sib_index = X86_SIB_INDEX(sib) + 8*rex_x; + sib_base = X86_SIB_BASE(sib) + 8*rex_b; +*/ + } switch (op1) { case 0x1: case 0x29: - if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) { + if (rex_w && modrm_mod == 3 && modrm_rm == CFI_SP) { /* add/sub reg, %rsp */ ADD_OP(op) { op->src.type = OP_SRC_ADD; - op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->src.reg = modrm_reg; op->dest.type = OP_DEST_REG; op->dest.reg = CFI_SP; } @@ -158,7 +153,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, /* push reg */ ADD_OP(op) { op->src.type = OP_SRC_REG; - op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b]; + op->src.reg = (op1 & 0x7) + 8*rex_b; op->dest.type = OP_DEST_PUSH; } @@ -170,7 +165,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, ADD_OP(op) { op->src.type = OP_SRC_POP; op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b]; + op->dest.reg = (op1 & 0x7) + 8*rex_b; } break; @@ -223,7 +218,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; case 0x89: - if (rex_w && !rex_r && modrm_reg == 4) { + if (rex_w && modrm_reg == CFI_SP) { if (modrm_mod == 3) { /* mov %rsp, reg */ @@ -231,17 +226,17 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, op->src.type = OP_SRC_REG; op->src.reg = CFI_SP; op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b]; + op->dest.reg = modrm_rm; }
[tip: objtool/core] objtool,x86: Rewrite ADD/SUB/AND
The following commit has been merged into the objtool/core branch of tip: Commit-ID: 961d83b9073b1ce5834af50d3c69e5e2461c6fd3 Gitweb: https://git.kernel.org/tip/961d83b9073b1ce5834af50d3c69e5e2461c6fd3 Author:Peter Zijlstra AuthorDate:Wed, 10 Feb 2021 14:11:30 +01:00 Committer: Ingo Molnar CommitterDate: Sat, 06 Mar 2021 12:44:23 +01:00 objtool,x86: Rewrite ADD/SUB/AND Support sign extending and imm8 forms. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173627.588366...@infradead.org --- tools/objtool/arch/x86/decode.c | 70 +++- 1 file changed, 51 insertions(+), 19 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 78ae5be..b42e5ec 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -98,13 +98,14 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, struct list_head *ops_list) { struct insn insn; - int x86_64, sign; + int x86_64; unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0; struct stack_op *op = NULL; struct symbol *sym; + u64 imm; x86_64 = is_x86_64(elf); if (x86_64 == -1) @@ -200,12 +201,54 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, *type = INSN_JUMP_CONDITIONAL; break; - case 0x81: - case 0x83: - if (rex != 0x48) + case 0x80 ... 0x83: + /* +* 1000 00sw : mod OP r/m : immediate +* +* s - sign extend immediate +* w - imm8 / imm32 +* +* OP: 000 ADD100 AND +* 001 OR 101 SUB +* 010 ADC110 XOR +* 011 SBB111 CMP +*/ + + /* 64bit only */ + if (!rex_w) break; - if (modrm == 0xe4) { + /* %rsp target only */ + if (!(modrm_mod == 3 && modrm_rm == CFI_SP)) + break; + + imm = insn.immediate.value; + if (op1 & 2) { /* sign extend */ + if (op1 & 1) { /* imm32 */ + imm <<= 32; + imm = (s64)imm >> 32; + } else { /* imm8 */ + imm <<= 56; + imm = (s64)imm >> 56; + } + } + + switch (modrm_reg & 7) { + case 5: + imm = -imm; + /* fallthrough */ + case 0: + /* add/sub imm, %rsp */ + ADD_OP(op) { + op->src.type = OP_SRC_ADD; + op->src.reg = CFI_SP; + op->src.offset = imm; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + } + break; + + case 4: /* and imm, %rsp */ ADD_OP(op) { op->src.type = OP_SRC_AND; @@ -215,23 +258,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, op->dest.reg = CFI_SP; } break; - } - if (modrm == 0xc4) - sign = 1; - else if (modrm == 0xec) - sign = -1; - else + default: + /* WARN ? */ break; - - /* add/sub imm, %rsp */ - ADD_OP(op) { - op->src.type = OP_SRC_ADD; - op->src.reg = CFI_SP; - op->src.offset = insn.immediate.value * sign; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; } + break; case 0x89:
[tip: objtool/core] objtool,x86: Support %riz encodings
The following commit has been merged into the objtool/core branch of tip: Commit-ID: 78df6245c3c82484200b9f8e306dc86fb19e9c02 Gitweb: https://git.kernel.org/tip/78df6245c3c82484200b9f8e306dc86fb19e9c02 Author:Peter Zijlstra AuthorDate:Wed, 10 Feb 2021 11:47:35 +01:00 Committer: Ingo Molnar CommitterDate: Sat, 06 Mar 2021 12:44:23 +01:00 objtool,x86: Support %riz encodings When there's a SIB byte, the register otherwise denoted by r/m will then be denoted by SIB.base REX.b will now extend this. SIB.index == SP is magic and notes an index value zero. This means that there's a bunch of alternative (longer) encodings for the same thing. Eg. 'ModRM.mod != 3, ModRM.r/m = AX' can be encoded as 'ModRM.mod != 3, ModRM.r/m = SP, SIB.base = AX, SIB.index = SP' which is actually 4 different encodings because the value of SIB.scale is irrelevant, giving rise to 5 different but equal encodings. Support these encodings and clean up the SIB handling in general. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173627.472967...@infradead.org --- tools/objtool/arch/x86/decode.c | 67 ++-- 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 5ce7dc4..78ae5be 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -72,6 +72,25 @@ unsigned long arch_jump_destination(struct instruction *insn) return -1; \ else for (list_add_tail(>list, ops_list); op; op = NULL) +/* + * Helpers to decode ModRM/SIB: + * + * r/m| AX CX DX BX | SP | BP | SI DI | + *| R8 R9 R10 R11 | R12 | R13 | R14 R15 | + * Mod++-+-+-+ + * 00 |[r/m] |[SIB]|[IP+]| [r/m] | + * 01 | [r/m + d8]|[S+d]| [r/m + d8] | + * 10 | [r/m + d32] |[S+D]| [r/m + d32] | + * 11 | r/ m | + * + */ +#define is_RIP() ((modrm_rm & 7) == CFI_BP && modrm_mod == 0) +#define have_SIB() ((modrm_rm & 7) == CFI_SP && modrm_mod != 3) + +#define rm_is(reg) (have_SIB() ? \ + sib_base == (reg) && sib_index == CFI_SP : \ + modrm_rm == (reg)) + int arch_decode_instruction(const struct elf *elf, const struct section *sec, unsigned long offset, unsigned int maxlen, unsigned int *len, enum insn_type *type, @@ -83,7 +102,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, - sib = 0 /* , sib_scale = 0, sib_index = 0, sib_base = 0 */; + sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0; struct stack_op *op = NULL; struct symbol *sym; @@ -125,11 +144,9 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, if (insn.sib.nbytes) { sib = insn.sib.bytes[0]; - /* - sib_scale = X86_SIB_SCALE(sib); + /* sib_scale = X86_SIB_SCALE(sib); */ sib_index = X86_SIB_INDEX(sib) + 8*rex_x; sib_base = X86_SIB_BASE(sib) + 8*rex_b; -*/ } switch (op1) { @@ -218,7 +235,10 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; case 0x89: - if (rex_w && modrm_reg == CFI_SP) { + if (!rex_w) + break; + + if (modrm_reg == CFI_SP) { if (modrm_mod == 3) { /* mov %rsp, reg */ @@ -231,14 +251,17 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; } else { - /* skip nontrivial SIB */ - if ((modrm_rm & 7) == 4 && !(sib == 0x24 && rex_b == rex_x)) - break; - /* skip RIP relative displacement */ - if ((modrm_rm & 7) == 5 && modrm_mod == 0) + if (is_RIP()) break; + /* skip nontrivial SIB */ + if (have_SIB()) { + modrm_rm = sib_base; + if (sib_index != CFI_SP) + break; + } + /* mov %rsp, disp(%reg) */ ADD_OP(op) {
[tip: objtool/core] objtool: Add --backup
The following commit has been merged into the objtool/core branch of tip: Commit-ID: 8ad15c6900840e8a2163012f4581c52127622e02 Gitweb: https://git.kernel.org/tip/8ad15c6900840e8a2163012f4581c52127622e02 Author:Peter Zijlstra AuthorDate:Fri, 26 Feb 2021 10:59:59 +01:00 Committer: Ingo Molnar CommitterDate: Sat, 06 Mar 2021 12:44:23 +01:00 objtool: Add --backup Teach objtool to write backups files, such that it becomes easier to see what objtool did to the object file. Backup files will be ${name}.orig. Suggested-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Borislav Petkov Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/yd4obt3aoxpwl...@hirez.programming.kicks-ass.net --- tools/objtool/builtin-check.c | 4 +- tools/objtool/include/objtool/builtin.h | 3 +- tools/objtool/objtool.c | 64 - 3 files changed, 69 insertions(+), 2 deletions(-) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index c3a85d8..97f063d 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -18,7 +18,8 @@ #include #include -bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr; +bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, + validate_dup, vmlinux, mcount, noinstr, backup; static const char * const check_usage[] = { "objtool check [] file.o", @@ -37,6 +38,7 @@ const struct option check_options[] = { OPT_BOOLEAN('n', "noinstr", , "noinstr validation for vmlinux.o"), OPT_BOOLEAN('l', "vmlinux", , "vmlinux.o validation"), OPT_BOOLEAN('M', "mcount", , "generate __mcount_loc"), + OPT_BOOLEAN('B', "backup", , "create .orig files before modification"), OPT_END(), }; diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 2502bb2..d019210 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -8,7 +8,8 @@ #include extern const struct option check_options[]; -extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr; +extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, +validate_dup, vmlinux, mcount, noinstr, backup; extern int cmd_check(int argc, const char **argv); extern int cmd_orc(int argc, const char **argv); diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index 7b97ce4..43c1836 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -44,6 +45,64 @@ bool help; const char *objname; static struct objtool_file file; +static bool objtool_create_backup(const char *_objname) +{ + int len = strlen(_objname); + char *buf, *base, *name = malloc(len+6); + int s, d, l, t; + + if (!name) { + perror("failed backup name malloc"); + return false; + } + + strcpy(name, _objname); + strcpy(name + len, ".orig"); + + d = open(name, O_CREAT|O_WRONLY|O_TRUNC, 0644); + if (d < 0) { + perror("failed to create backup file"); + return false; + } + + s = open(_objname, O_RDONLY); + if (s < 0) { + perror("failed to open orig file"); + return false; + } + + buf = malloc(4096); + if (!buf) { + perror("failed backup data malloc"); + return false; + } + + while ((l = read(s, buf, 4096)) > 0) { + base = buf; + do { + t = write(d, base, l); + if (t < 0) { + perror("failed backup write"); + return false; + } + base += t; + l -= t; + } while (l); + } + + if (l < 0) { + perror("failed backup read"); + return false; + } + + free(name); + free(buf); + close(d); + close(s); + + return true; +} + struct objtool_file *objtool_open_read(const char *_objname) { if (objname) { @@ -59,6 +118,11 @@ struct objtool_file *objtool_open_read(const char *_objname) if (!file.elf) return NULL; + if (backup && !objtool_create_backup(objname)) { + WARN("can't create backup file"); + return NULL; + } + INIT_LIST_HEAD(_list); hash_init(file.insn_hash); INIT_LIST_HEAD(_call_list);
[tip: sched/core] sched: Simplify set_affinity_pending refcounts
The following commit has been merged into the sched/core branch of tip: Commit-ID: 50caf9c14b1498c90cf808dbba2ca29bd32ccba4 Gitweb: https://git.kernel.org/tip/50caf9c14b1498c90cf808dbba2ca29bd32ccba4 Author:Peter Zijlstra AuthorDate:Wed, 24 Feb 2021 11:42:08 +01:00 Committer: Ingo Molnar CommitterDate: Sat, 06 Mar 2021 12:40:21 +01:00 sched: Simplify set_affinity_pending refcounts Now that we have set_affinity_pending::stop_pending to indicate if a stopper is in progress, and we have the guarantee that if that stopper exists, it will (eventually) complete our @pending we can simplify the refcount scheme by no longer counting the stopper thread. Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()") Cc: sta...@kernel.org Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210224131355.724130...@infradead.org --- kernel/sched/core.c | 32 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4e4d100..9819121 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1862,6 +1862,10 @@ struct migration_arg { struct set_affinity_pending *pending; }; +/* + * @refs: number of wait_for_completion() + * @stop_pending: is @stop_work in use + */ struct set_affinity_pending { refcount_t refs; unsigned intstop_pending; @@ -1997,10 +2001,6 @@ out: if (complete) complete_all(>done); - /* For pending->{arg,stop_work} */ - if (pending && refcount_dec_and_test(>refs)) - wake_up_var(>refs); - return 0; } @@ -2199,12 +2199,16 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag push_task = get_task_struct(p); } + /* +* If there are pending waiters, but no pending stop_work, +* then complete now. +*/ pending = p->migration_pending; - if (pending) { - refcount_inc(>refs); + if (pending && !pending->stop_pending) { p->migration_pending = NULL; complete = true; } + task_rq_unlock(rq, p, rf); if (push_task) { @@ -2213,7 +2217,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag } if (complete) - goto do_complete; + complete_all(>done); return 0; } @@ -2264,9 +2268,9 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag if (!stop_pending) pending->stop_pending = true; - refcount_inc(>refs); /* pending->{arg,stop_work} */ if (flags & SCA_MIGRATE_ENABLE) p->migration_flags &= ~MDF_PUSH; + task_rq_unlock(rq, p, rf); if (!stop_pending) { @@ -2282,12 +2286,13 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag if (task_on_rq_queued(p)) rq = move_queued_task(rq, rf, p, dest_cpu); - p->migration_pending = NULL; - complete = true; + if (!pending->stop_pending) { + p->migration_pending = NULL; + complete = true; + } } task_rq_unlock(rq, p, rf); -do_complete: if (complete) complete_all(>done); } @@ -2295,7 +2300,7 @@ do_complete: wait_for_completion(>done); if (refcount_dec_and_test(>refs)) - wake_up_var(>refs); + wake_up_var(>refs); /* No UaF, just an address */ /* * Block the original owner of until all subsequent callers @@ -2303,6 +2308,9 @@ do_complete: */ wait_var_event(_pending.refs, !refcount_read(_pending.refs)); + /* ARGH */ + WARN_ON_ONCE(my_pending.stop_pending); + return 0; }
[tip: sched/core] sched: Collate affine_move_task() stoppers
The following commit has been merged into the sched/core branch of tip: Commit-ID: 58b1a45086b5f80f2b2842aa7ed0da51a64a302b Gitweb: https://git.kernel.org/tip/58b1a45086b5f80f2b2842aa7ed0da51a64a302b Author:Peter Zijlstra AuthorDate:Wed, 24 Feb 2021 11:15:23 +01:00 Committer: Ingo Molnar CommitterDate: Sat, 06 Mar 2021 12:40:21 +01:00 sched: Collate affine_move_task() stoppers The SCA_MIGRATE_ENABLE and task_running() cases are almost identical, collapse them to avoid further duplication. Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()") Cc: sta...@kernel.org Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210224131355.500108...@infradead.org --- kernel/sched/core.c | 23 --- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 088e8f4..84b657f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2239,30 +2239,23 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag return -EINVAL; } - if (flags & SCA_MIGRATE_ENABLE) { - - refcount_inc(>refs); /* pending->{arg,stop_work} */ - p->migration_flags &= ~MDF_PUSH; - task_rq_unlock(rq, p, rf); - - stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, - >arg, >stop_work); - - return 0; - } - if (task_running(rq, p) || p->state == TASK_WAKING) { /* -* Lessen races (and headaches) by delegating -* is_migration_disabled(p) checks to the stopper, which will -* run on the same CPU as said p. +* MIGRATE_ENABLE gets here because 'p == current', but for +* anything else we cannot do is_migration_disabled(), punt +* and have the stopper function handle it all race-free. */ + refcount_inc(>refs); /* pending->{arg,stop_work} */ + if (flags & SCA_MIGRATE_ENABLE) + p->migration_flags &= ~MDF_PUSH; task_rq_unlock(rq, p, rf); stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, >arg, >stop_work); + if (flags & SCA_MIGRATE_ENABLE) + return 0; } else { if (!is_migration_disabled(p)) {
[tip: sched/core] sched: Simplify migration_cpu_stop()
The following commit has been merged into the sched/core branch of tip: Commit-ID: c20cf065d4a619d394d23290093b1002e27dff86 Gitweb: https://git.kernel.org/tip/c20cf065d4a619d394d23290093b1002e27dff86 Author:Peter Zijlstra AuthorDate:Wed, 24 Feb 2021 11:50:39 +01:00 Committer: Ingo Molnar CommitterDate: Sat, 06 Mar 2021 12:40:20 +01:00 sched: Simplify migration_cpu_stop() When affine_move_task() issues a migration_cpu_stop(), the purpose of that function is to complete that @pending, not any random other p->migration_pending that might have gotten installed since. This realization much simplifies migration_cpu_stop() and allows further necessary steps to fix all this as it provides the guarantee that @pending's stopper will complete @pending (and not some random other @pending). Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()") Cc: sta...@kernel.org Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210224131355.430014...@infradead.org --- kernel/sched/core.c | 56 ++-- 1 file changed, 8 insertions(+), 48 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 79ddba5..088e8f4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1898,8 +1898,8 @@ static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf, */ static int migration_cpu_stop(void *data) { - struct set_affinity_pending *pending; struct migration_arg *arg = data; + struct set_affinity_pending *pending = arg->pending; struct task_struct *p = arg->task; int dest_cpu = arg->dest_cpu; struct rq *rq = this_rq(); @@ -1921,25 +1921,6 @@ static int migration_cpu_stop(void *data) raw_spin_lock(>pi_lock); rq_lock(rq, ); - pending = p->migration_pending; - if (pending && !arg->pending) { - /* -* This happens from sched_exec() and migrate_task_to(), -* neither of them care about pending and just want a task to -* maybe move about. -* -* Even if there is a pending, we can ignore it, since -* affine_move_task() will have it's own stop_work's in flight -* which will manage the completion. -* -* Notably, pending doesn't need to match arg->pending. This can -* happen when tripple concurrent affine_move_task() first sets -* pending, then clears pending and eventually sets another -* pending. -*/ - pending = NULL; - } - /* * If task_rq(p) != rq, it cannot be migrated here, because we're * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because @@ -1950,31 +1931,20 @@ static int migration_cpu_stop(void *data) goto out; if (pending) { - p->migration_pending = NULL; + if (p->migration_pending == pending) + p->migration_pending = NULL; complete = true; } - /* migrate_enable() -- we must not race against SCA */ - if (dest_cpu < 0) { - /* -* When this was migrate_enable() but we no longer -* have a @pending, a concurrent SCA 'fixed' things -* and we should be valid again. Nothing to do. -*/ - if (!pending) { - WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), >cpus_mask)); - goto out; - } - + if (dest_cpu < 0) dest_cpu = cpumask_any_distribute(>cpus_mask); - } if (task_on_rq_queued(p)) rq = __migrate_task(rq, , p, dest_cpu); else p->wake_cpu = dest_cpu; - } else if (dest_cpu < 0 || pending) { + } else if (pending) { /* * This happens when we get migrated between migrate_enable()'s * preempt_enable() and scheduling the stopper task. At that @@ -1989,23 +1959,14 @@ static int migration_cpu_stop(void *data) * ->pi_lock, so the allowed mask is stable - if it got * somewhere allowed, we're done. */ - if (pending && cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) { - p->migration_pending = NULL; + if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) { + if (p->migration_pending == pending) + p->migration_pending = NULL; complete = true;
[tip: sched/core] sched: Optimize migration_cpu_stop()
The following commit has been merged into the sched/core branch of tip: Commit-ID: 3f1bc119cd7fc987c8ed25ffb717f99403bb308c Gitweb: https://git.kernel.org/tip/3f1bc119cd7fc987c8ed25ffb717f99403bb308c Author:Peter Zijlstra AuthorDate:Wed, 24 Feb 2021 11:21:35 +01:00 Committer: Ingo Molnar CommitterDate: Sat, 06 Mar 2021 12:40:21 +01:00 sched: Optimize migration_cpu_stop() When the purpose of migration_cpu_stop() is to migrate the task to 'any' valid CPU, don't migrate the task when it's already running on a valid CPU. Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()") Cc: sta...@kernel.org Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210224131355.569238...@infradead.org --- kernel/sched/core.c | 13 - 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 84b657f..ac05afb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1936,14 +1936,25 @@ static int migration_cpu_stop(void *data) complete = true; } - if (dest_cpu < 0) + if (dest_cpu < 0) { + if (cpumask_test_cpu(task_cpu(p), >cpus_mask)) + goto out; + dest_cpu = cpumask_any_distribute(>cpus_mask); + } if (task_on_rq_queued(p)) rq = __migrate_task(rq, , p, dest_cpu); else p->wake_cpu = dest_cpu; + /* +* XXX __migrate_task() can fail, at which point we might end +* up running on a dodgy CPU, AFAICT this can only happen +* during CPU hotplug, at which point we'll get pushed out +* anyway, so it's probably not a big deal. +*/ + } else if (pending) { /* * This happens when we get migrated between migrate_enable()'s
[tip: sched/core] sched: Fix migration_cpu_stop() requeueing
The following commit has been merged into the sched/core branch of tip: Commit-ID: 8a6edb5257e2a84720fe78cb179eca58ba76126f Gitweb: https://git.kernel.org/tip/8a6edb5257e2a84720fe78cb179eca58ba76126f Author:Peter Zijlstra AuthorDate:Sat, 13 Feb 2021 13:10:35 +01:00 Committer: Ingo Molnar CommitterDate: Sat, 06 Mar 2021 12:40:20 +01:00 sched: Fix migration_cpu_stop() requeueing When affine_move_task(p) is called on a running task @p, which is not otherwise already changing affinity, we'll first set p->migration_pending and then do: stop_one_cpu(cpu_of_rq(rq), migration_cpu_stop, ); This then gets us to migration_cpu_stop() running on the CPU that was previously running our victim task @p. If we find that our task is no longer on that runqueue (this can happen because of a concurrent migration due to load-balance etc.), then we'll end up at the: } else if (dest_cpu < 1 || pending) { branch. Which we'll take because we set pending earlier. Here we first check if the task @p has already satisfied the affinity constraints, if so we bail early [A]. Otherwise we'll reissue migration_cpu_stop() onto the CPU that is now hosting our task @p: stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, >arg, >stop_work); Except, we've never initialized pending->arg, which will be all 0s. This then results in running migration_cpu_stop() on the next CPU with arg->p == NULL, which gives the by now obvious result of fireworks. The cure is to change affine_move_task() to always use pending->arg, furthermore we can use the exact same pattern as the SCA_MIGRATE_ENABLE case, since we'll block on the pending->done completion anyway, no point in adding yet another completion in stop_one_cpu(). This then gives a clear distinction between the two migration_cpu_stop() use cases: - sched_exec() / migrate_task_to() : arg->pending == NULL - affine_move_task() : arg->pending != NULL; And we can have it ignore p->migration_pending when !arg->pending. Any stop work from sched_exec() / migrate_task_to() is in addition to stop works from affine_move_task(), which will be sufficient to issue the completion. Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()") Cc: sta...@kernel.org Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210224131355.357743...@infradead.org --- kernel/sched/core.c | 39 --- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ca2bb62..79ddba5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1922,6 +1922,24 @@ static int migration_cpu_stop(void *data) rq_lock(rq, ); pending = p->migration_pending; + if (pending && !arg->pending) { + /* +* This happens from sched_exec() and migrate_task_to(), +* neither of them care about pending and just want a task to +* maybe move about. +* +* Even if there is a pending, we can ignore it, since +* affine_move_task() will have it's own stop_work's in flight +* which will manage the completion. +* +* Notably, pending doesn't need to match arg->pending. This can +* happen when tripple concurrent affine_move_task() first sets +* pending, then clears pending and eventually sets another +* pending. +*/ + pending = NULL; + } + /* * If task_rq(p) != rq, it cannot be migrated here, because we're * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because @@ -2194,10 +2212,6 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag int dest_cpu, unsigned int flags) { struct set_affinity_pending my_pending = { }, *pending = NULL; - struct migration_arg arg = { - .task = p, - .dest_cpu = dest_cpu, - }; bool complete = false; /* Can the task run on the task's current CPU? If so, we're done */ @@ -2235,6 +2249,12 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag /* Install the request */ refcount_set(_pending.refs, 1); init_completion(_pending.done); + my_pending.arg = (struct migration_arg) { + .task = p, + .dest_cpu = -1, /* any */ + .pending = _pending, + }; + p->migration_pending = _pending; } else { pending = p->migration_pending; @@ -2265,12 +2285,6 @@
[tip: sched/core] sched: Fix affine_move_task() self-concurrency
The following commit has been merged into the sched/core branch of tip: Commit-ID: 9e81889c7648d48dd5fe13f41cbc99f3c362484a Gitweb: https://git.kernel.org/tip/9e81889c7648d48dd5fe13f41cbc99f3c362484a Author:Peter Zijlstra AuthorDate:Wed, 24 Feb 2021 11:31:09 +01:00 Committer: Ingo Molnar CommitterDate: Sat, 06 Mar 2021 12:40:21 +01:00 sched: Fix affine_move_task() self-concurrency Consider: sched_setaffinity(p, X); sched_setaffinity(p, Y); Then the first will install p->migration_pending = _pending; and issue stop_one_cpu_nowait(pending); and the second one will read p->migration_pending and _also_ issue: stop_one_cpu_nowait(pending), the _SAME_ @pending. This causes stopper list corruption. Add set_affinity_pending::stop_pending, to indicate if a stopper is in progress. Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()") Cc: sta...@kernel.org Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210224131355.649146...@infradead.org --- kernel/sched/core.c | 15 --- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ac05afb..4e4d100 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1864,6 +1864,7 @@ struct migration_arg { struct set_affinity_pending { refcount_t refs; + unsigned intstop_pending; struct completion done; struct cpu_stop_workstop_work; struct migration_argarg; @@ -1982,12 +1983,15 @@ static int migration_cpu_stop(void *data) * determine is_migration_disabled() and so have to chase after * it. */ + WARN_ON_ONCE(!pending->stop_pending); task_rq_unlock(rq, p, ); stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop, >arg, >stop_work); return 0; } out: + if (pending) + pending->stop_pending = false; task_rq_unlock(rq, p, ); if (complete) @@ -2183,7 +2187,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag int dest_cpu, unsigned int flags) { struct set_affinity_pending my_pending = { }, *pending = NULL; - bool complete = false; + bool stop_pending, complete = false; /* Can the task run on the task's current CPU? If so, we're done */ if (cpumask_test_cpu(task_cpu(p), >cpus_mask)) { @@ -2256,14 +2260,19 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag * anything else we cannot do is_migration_disabled(), punt * and have the stopper function handle it all race-free. */ + stop_pending = pending->stop_pending; + if (!stop_pending) + pending->stop_pending = true; refcount_inc(>refs); /* pending->{arg,stop_work} */ if (flags & SCA_MIGRATE_ENABLE) p->migration_flags &= ~MDF_PUSH; task_rq_unlock(rq, p, rf); - stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, - >arg, >stop_work); + if (!stop_pending) { + stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, + >arg, >stop_work); + } if (flags & SCA_MIGRATE_ENABLE) return 0;
[tip: objtool/core] objtool,x86: Rewrite LEA decode
The following commit has been merged into the objtool/core branch of tip: Commit-ID: 7ee93978f459ace4e0fe30af582d343d5fb6421a Gitweb: https://git.kernel.org/tip/7ee93978f459ace4e0fe30af582d343d5fb6421a Author:Peter Zijlstra AuthorDate:Tue, 09 Feb 2021 21:29:16 +01:00 Committer: Peter Zijlstra CommitterDate: Wed, 03 Mar 2021 09:38:29 +01:00 objtool,x86: Rewrite LEA decode Current LEA decoding is a bunch of special cases, properly decode the instruction, with exception of full SIB and RIP-relative modes. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173627.143250...@infradead.org --- tools/objtool/arch/x86/decode.c | 86 ++-- 1 file changed, 28 insertions(+), 58 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 549813c..d8f0138 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -91,9 +91,10 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, { struct insn insn; int x86_64, sign; - unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, - rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, - modrm_reg = 0, sib = 0; + unsigned char op1, op2, + rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, + modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, + sib = 0; struct stack_op *op = NULL; struct symbol *sym; @@ -328,68 +329,37 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; case 0x8d: - if (sib == 0x24 && rex_w && !rex_b && !rex_x) { - - ADD_OP(op) { - if (!insn.displacement.value) { - /* lea (%rsp), reg */ - op->src.type = OP_SRC_REG; - } else { - /* lea disp(%rsp), reg */ - op->src.type = OP_SRC_ADD; - op->src.offset = insn.displacement.value; - } - op->src.reg = CFI_SP; - op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; - } - - } else if (rex == 0x48 && modrm == 0x65) { - - /* lea disp(%rbp), %rsp */ - ADD_OP(op) { - op->src.type = OP_SRC_ADD; - op->src.reg = CFI_BP; - op->src.offset = insn.displacement.value; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; - } + if (modrm_mod == 3) { + WARN("invalid LEA encoding at %s:0x%lx", sec->name, offset); + break; + } - } else if (rex == 0x49 && modrm == 0x62 && - insn.displacement.value == -8) { + /* skip non 64bit ops */ + if (!rex_w) + break; - /* -* lea -0x8(%r10), %rsp -* -* Restoring rsp back to its original value after a -* stack realignment. -*/ - ADD_OP(op) { - op->src.type = OP_SRC_ADD; - op->src.reg = CFI_R10; - op->src.offset = -8; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; - } + /* skip nontrivial SIB */ + if (modrm_rm == 4 && !(sib == 0x24 && rex_b == rex_x)) + break; - } else if (rex == 0x49 && modrm == 0x65 && - insn.displacement.value == -16) { + /* skip RIP relative displacement */ + if (modrm_rm == 5 && modrm_mod == 0) + break; - /* -* lea -0x10(%r13), %rsp -* -* Restoring rsp back to its original value after a -* stack realignment. -*/ - ADD_OP(op) { + /* lea disp(%src), %dst */ + ADD_OP(op) { + op->src.offset = insn.displacement.value; + if (!op->src.offset) { + /* lea
[tip: objtool/core] objtool,x86: More ModRM sugar
The following commit has been merged into the objtool/core branch of tip: Commit-ID: 7e1b2eb05787d1c7f18445b7cfdfc612e827ca7b Gitweb: https://git.kernel.org/tip/7e1b2eb05787d1c7f18445b7cfdfc612e827ca7b Author:Peter Zijlstra AuthorDate:Fri, 12 Feb 2021 09:13:00 +01:00 Committer: Peter Zijlstra CommitterDate: Wed, 03 Mar 2021 09:38:31 +01:00 objtool,x86: More ModRM sugar Better helpers to decode ModRM. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/YCZB/ljatfxqq...@hirez.programming.kicks-ass.net --- tools/objtool/arch/x86/decode.c | 28 +--- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index b42e5ec..431bafb 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -82,15 +82,21 @@ unsigned long arch_jump_destination(struct instruction *insn) * 01 | [r/m + d8]|[S+d]| [r/m + d8] | * 10 | [r/m + d32] |[S+D]| [r/m + d32] | * 11 | r/ m | - * */ + +#define mod_is_mem() (modrm_mod != 3) +#define mod_is_reg() (modrm_mod == 3) + #define is_RIP() ((modrm_rm & 7) == CFI_BP && modrm_mod == 0) -#define have_SIB() ((modrm_rm & 7) == CFI_SP && modrm_mod != 3) +#define have_SIB() ((modrm_rm & 7) == CFI_SP && mod_is_mem()) #define rm_is(reg) (have_SIB() ? \ sib_base == (reg) && sib_index == CFI_SP : \ modrm_rm == (reg)) +#define rm_is_mem(reg) (mod_is_mem() && !is_RIP() && rm_is(reg)) +#define rm_is_reg(reg) (mod_is_reg() && modrm_rm == (reg)) + int arch_decode_instruction(const struct elf *elf, const struct section *sec, unsigned long offset, unsigned int maxlen, unsigned int *len, enum insn_type *type, @@ -154,7 +160,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, case 0x1: case 0x29: - if (rex_w && modrm_mod == 3 && modrm_rm == CFI_SP) { + if (rex_w && rm_is_reg(CFI_SP)) { /* add/sub reg, %rsp */ ADD_OP(op) { @@ -219,7 +225,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; /* %rsp target only */ - if (!(modrm_mod == 3 && modrm_rm == CFI_SP)) + if (!rm_is_reg(CFI_SP)) break; imm = insn.immediate.value; @@ -272,7 +278,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, if (modrm_reg == CFI_SP) { - if (modrm_mod == 3) { + if (mod_is_reg()) { /* mov %rsp, reg */ ADD_OP(op) { op->src.type = OP_SRC_REG; @@ -308,7 +314,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; } - if (modrm_mod == 3 && modrm_rm == CFI_SP) { + if (rm_is_reg(CFI_SP)) { /* mov reg, %rsp */ ADD_OP(op) { @@ -325,7 +331,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, if (!rex_w) break; - if ((modrm_mod == 1 || modrm_mod == 2) && modrm_rm == CFI_BP) { + if (rm_is_mem(CFI_BP)) { /* mov reg, disp(%rbp) */ ADD_OP(op) { @@ -338,7 +344,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; } - if (modrm_mod != 3 && rm_is(CFI_SP)) { + if (rm_is_mem(CFI_SP)) { /* mov reg, disp(%rsp) */ ADD_OP(op) { @@ -357,7 +363,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, if (!rex_w) break; - if ((modrm_mod == 1 || modrm_mod == 2) && modrm_rm == CFI_BP) { + if (rm_is_mem(CFI_BP)) { /* mov disp(%rbp), reg */ ADD_OP(op) { @@ -370,7 +376,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; } - if (modrm_mod != 3 && rm_is(CFI_SP)) { + if (rm_is_mem(CFI_SP)) { /* mov disp(%rsp), reg */ ADD_OP(op) { @@ -386,7 +392,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; case 0x8d: - if (modrm_mod == 3) { + if (mod_is_reg()) { WARN("invalid LEA
[tip: objtool/core] objtool: Add --backup
The following commit has been merged into the objtool/core branch of tip: Commit-ID: 327695eb9e3461b09d5cd5baef5df6526dd240c6 Gitweb: https://git.kernel.org/tip/327695eb9e3461b09d5cd5baef5df6526dd240c6 Author:Peter Zijlstra AuthorDate:Fri, 26 Feb 2021 10:59:59 +01:00 Committer: Peter Zijlstra CommitterDate: Wed, 03 Mar 2021 09:38:31 +01:00 objtool: Add --backup Teach objtool to write backups files, such that it becomes easier to see what objtool did to the object file. Backup files will be ${name}.orig. Suggested-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Borislav Petkov Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/yd4obt3aoxpwl...@hirez.programming.kicks-ass.net --- tools/objtool/builtin-check.c | 4 +- tools/objtool/include/objtool/builtin.h | 3 +- tools/objtool/objtool.c | 64 - 3 files changed, 69 insertions(+), 2 deletions(-) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index c3a85d8..97f063d 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -18,7 +18,8 @@ #include #include -bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr; +bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, + validate_dup, vmlinux, mcount, noinstr, backup; static const char * const check_usage[] = { "objtool check [] file.o", @@ -37,6 +38,7 @@ const struct option check_options[] = { OPT_BOOLEAN('n', "noinstr", , "noinstr validation for vmlinux.o"), OPT_BOOLEAN('l', "vmlinux", , "vmlinux.o validation"), OPT_BOOLEAN('M', "mcount", , "generate __mcount_loc"), + OPT_BOOLEAN('B', "backup", , "create .orig files before modification"), OPT_END(), }; diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 2502bb2..d019210 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -8,7 +8,8 @@ #include extern const struct option check_options[]; -extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr; +extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, +validate_dup, vmlinux, mcount, noinstr, backup; extern int cmd_check(int argc, const char **argv); extern int cmd_orc(int argc, const char **argv); diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index 7b97ce4..43c1836 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -44,6 +45,64 @@ bool help; const char *objname; static struct objtool_file file; +static bool objtool_create_backup(const char *_objname) +{ + int len = strlen(_objname); + char *buf, *base, *name = malloc(len+6); + int s, d, l, t; + + if (!name) { + perror("failed backup name malloc"); + return false; + } + + strcpy(name, _objname); + strcpy(name + len, ".orig"); + + d = open(name, O_CREAT|O_WRONLY|O_TRUNC, 0644); + if (d < 0) { + perror("failed to create backup file"); + return false; + } + + s = open(_objname, O_RDONLY); + if (s < 0) { + perror("failed to open orig file"); + return false; + } + + buf = malloc(4096); + if (!buf) { + perror("failed backup data malloc"); + return false; + } + + while ((l = read(s, buf, 4096)) > 0) { + base = buf; + do { + t = write(d, base, l); + if (t < 0) { + perror("failed backup write"); + return false; + } + base += t; + l -= t; + } while (l); + } + + if (l < 0) { + perror("failed backup read"); + return false; + } + + free(name); + free(buf); + close(d); + close(s); + + return true; +} + struct objtool_file *objtool_open_read(const char *_objname) { if (objname) { @@ -59,6 +118,11 @@ struct objtool_file *objtool_open_read(const char *_objname) if (!file.elf) return NULL; + if (backup && !objtool_create_backup(objname)) { + WARN("can't create backup file"); + return NULL; + } + INIT_LIST_HEAD(_list); hash_init(file.insn_hash); INIT_LIST_HEAD(_call_list);
[tip: objtool/core] objtool,x86: Renumber CFI_reg
The following commit has been merged into the objtool/core branch of tip: Commit-ID: 5e506daa2d148f735f90b2018ca6ef6e52144fad Gitweb: https://git.kernel.org/tip/5e506daa2d148f735f90b2018ca6ef6e52144fad Author:Peter Zijlstra AuthorDate:Tue, 09 Feb 2021 20:18:21 +01:00 Committer: Peter Zijlstra CommitterDate: Wed, 03 Mar 2021 09:38:29 +01:00 objtool,x86: Renumber CFI_reg Make them match the instruction encoding numbering. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173627.033720...@infradead.org --- tools/objtool/arch/x86/include/arch/cfi_regs.h | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/objtool/arch/x86/include/arch/cfi_regs.h b/tools/objtool/arch/x86/include/arch/cfi_regs.h index 79bc517..0579d22 100644 --- a/tools/objtool/arch/x86/include/arch/cfi_regs.h +++ b/tools/objtool/arch/x86/include/arch/cfi_regs.h @@ -4,13 +4,13 @@ #define _OBJTOOL_CFI_REGS_H #define CFI_AX 0 -#define CFI_DX 1 -#define CFI_CX 2 +#define CFI_CX 1 +#define CFI_DX 2 #define CFI_BX 3 -#define CFI_SI 4 -#define CFI_DI 5 -#define CFI_BP 6 -#define CFI_SP 7 +#define CFI_SP 4 +#define CFI_BP 5 +#define CFI_SI 6 +#define CFI_DI 7 #define CFI_R8 8 #define CFI_R9 9 #define CFI_R1010
[tip: objtool/core] objtool,x86: Rewrite LEAVE
The following commit has been merged into the objtool/core branch of tip: Commit-ID: a91451516348221f2477205eca9e813830e01fa3 Gitweb: https://git.kernel.org/tip/a91451516348221f2477205eca9e813830e01fa3 Author:Peter Zijlstra AuthorDate:Tue, 09 Feb 2021 21:41:13 +01:00 Committer: Peter Zijlstra CommitterDate: Wed, 03 Mar 2021 09:38:30 +01:00 objtool,x86: Rewrite LEAVE Since we can now have multiple stack-ops per instruction, we don't need to special case LEAVE and can simply emit the composite operations. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173627.253273...@infradead.org --- tools/objtool/arch/x86/decode.c | 14 +++--- tools/objtool/check.c| 24 ++-- tools/objtool/include/objtool/arch.h | 1 - 3 files changed, 13 insertions(+), 26 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index d8f0138..47b9acf 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -446,9 +446,17 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, * mov bp, sp * pop bp */ - ADD_OP(op) - op->dest.type = OP_DEST_LEAVE; - + ADD_OP(op) { + op->src.type = OP_SRC_REG; + op->src.reg = CFI_BP; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + } + ADD_OP(op) { + op->src.type = OP_SRC_POP; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_BP; + } break; case 0xe3: diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 12b8f0f..a0f762a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2020,7 +2020,7 @@ static int update_cfi_state(struct instruction *insn, } else if (op->src.reg == CFI_BP && op->dest.reg == CFI_SP && -cfa->base == CFI_BP) { +(cfa->base == CFI_BP || cfa->base == cfi->drap_reg)) { /* * mov %rbp, %rsp @@ -2217,7 +2217,7 @@ static int update_cfi_state(struct instruction *insn, cfa->offset = 0; cfi->drap_offset = -1; - } else if (regs[op->dest.reg].offset == -cfi->stack_size) { + } else if (cfi->stack_size == -regs[op->dest.reg].offset) { /* pop %reg */ restore_reg(cfi, op->dest.reg); @@ -2358,26 +2358,6 @@ static int update_cfi_state(struct instruction *insn, break; - case OP_DEST_LEAVE: - if ((!cfi->drap && cfa->base != CFI_BP) || - (cfi->drap && cfa->base != cfi->drap_reg)) { - WARN_FUNC("leave instruction with modified stack frame", - insn->sec, insn->offset); - return -1; - } - - /* leave (mov %rbp, %rsp; pop %rbp) */ - - cfi->stack_size = -cfi->regs[CFI_BP].offset - 8; - restore_reg(cfi, CFI_BP); - - if (!cfi->drap) { - cfa->base = CFI_SP; - cfa->offset -= 8; - } - - break; - case OP_DEST_MEM: if (op->src.type != OP_SRC_POP && op->src.type != OP_SRC_POPF) { WARN_FUNC("unknown stack-related memory operation", diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 6ff0685..ff21f38 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -35,7 +35,6 @@ enum op_dest_type { OP_DEST_MEM, OP_DEST_PUSH, OP_DEST_PUSHF, - OP_DEST_LEAVE, }; struct op_dest {
[tip: objtool/core] objtool,x86: Simplify register decode
The following commit has been merged into the objtool/core branch of tip: Commit-ID: 9d5a2c2caa10fc135d7020e76baa6a17c52e608f Gitweb: https://git.kernel.org/tip/9d5a2c2caa10fc135d7020e76baa6a17c52e608f Author:Peter Zijlstra AuthorDate:Tue, 09 Feb 2021 19:59:43 +01:00 Committer: Peter Zijlstra CommitterDate: Wed, 03 Mar 2021 09:38:30 +01:00 objtool,x86: Simplify register decode Since the CFI_reg number now matches the instruction encoding order do away with the op_to_cfi_reg[] and use direct assignment. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173627.362004...@infradead.org --- tools/objtool/arch/x86/decode.c | 79 +++- 1 file changed, 39 insertions(+), 40 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 47b9acf..5ce7dc4 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -17,17 +17,6 @@ #include #include -static unsigned char op_to_cfi_reg[][2] = { - {CFI_AX, CFI_R8}, - {CFI_CX, CFI_R9}, - {CFI_DX, CFI_R10}, - {CFI_BX, CFI_R11}, - {CFI_SP, CFI_R12}, - {CFI_BP, CFI_R13}, - {CFI_SI, CFI_R14}, - {CFI_DI, CFI_R15}, -}; - static int is_x86_64(const struct elf *elf) { switch (elf->ehdr.e_machine) { @@ -94,7 +83,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, - sib = 0; + sib = 0 /* , sib_scale = 0, sib_index = 0, sib_base = 0 */; struct stack_op *op = NULL; struct symbol *sym; @@ -130,23 +119,29 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, if (insn.modrm.nbytes) { modrm = insn.modrm.bytes[0]; modrm_mod = X86_MODRM_MOD(modrm); - modrm_reg = X86_MODRM_REG(modrm); - modrm_rm = X86_MODRM_RM(modrm); + modrm_reg = X86_MODRM_REG(modrm) + 8*rex_r; + modrm_rm = X86_MODRM_RM(modrm) + 8*rex_b; } - if (insn.sib.nbytes) + if (insn.sib.nbytes) { sib = insn.sib.bytes[0]; + /* + sib_scale = X86_SIB_SCALE(sib); + sib_index = X86_SIB_INDEX(sib) + 8*rex_x; + sib_base = X86_SIB_BASE(sib) + 8*rex_b; +*/ + } switch (op1) { case 0x1: case 0x29: - if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) { + if (rex_w && modrm_mod == 3 && modrm_rm == CFI_SP) { /* add/sub reg, %rsp */ ADD_OP(op) { op->src.type = OP_SRC_ADD; - op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->src.reg = modrm_reg; op->dest.type = OP_DEST_REG; op->dest.reg = CFI_SP; } @@ -158,7 +153,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, /* push reg */ ADD_OP(op) { op->src.type = OP_SRC_REG; - op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b]; + op->src.reg = (op1 & 0x7) + 8*rex_b; op->dest.type = OP_DEST_PUSH; } @@ -170,7 +165,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, ADD_OP(op) { op->src.type = OP_SRC_POP; op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b]; + op->dest.reg = (op1 & 0x7) + 8*rex_b; } break; @@ -223,7 +218,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; case 0x89: - if (rex_w && !rex_r && modrm_reg == 4) { + if (rex_w && modrm_reg == CFI_SP) { if (modrm_mod == 3) { /* mov %rsp, reg */ @@ -231,17 +226,17 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, op->src.type = OP_SRC_REG; op->src.reg = CFI_SP; op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b]; + op->dest.reg = modrm_rm; } break;
[tip: objtool/core] objtool,x86: Rewrite ADD/SUB/AND
The following commit has been merged into the objtool/core branch of tip: Commit-ID: e1bba6c8930b56c4afe88aa875f3d20d1cef4fe1 Gitweb: https://git.kernel.org/tip/e1bba6c8930b56c4afe88aa875f3d20d1cef4fe1 Author:Peter Zijlstra AuthorDate:Wed, 10 Feb 2021 14:11:30 +01:00 Committer: Peter Zijlstra CommitterDate: Wed, 03 Mar 2021 09:38:31 +01:00 objtool,x86: Rewrite ADD/SUB/AND Support sign extending and imm8 forms. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173627.588366...@infradead.org --- tools/objtool/arch/x86/decode.c | 70 +++- 1 file changed, 51 insertions(+), 19 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 78ae5be..b42e5ec 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -98,13 +98,14 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, struct list_head *ops_list) { struct insn insn; - int x86_64, sign; + int x86_64; unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0; struct stack_op *op = NULL; struct symbol *sym; + u64 imm; x86_64 = is_x86_64(elf); if (x86_64 == -1) @@ -200,12 +201,54 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, *type = INSN_JUMP_CONDITIONAL; break; - case 0x81: - case 0x83: - if (rex != 0x48) + case 0x80 ... 0x83: + /* +* 1000 00sw : mod OP r/m : immediate +* +* s - sign extend immediate +* w - imm8 / imm32 +* +* OP: 000 ADD100 AND +* 001 OR 101 SUB +* 010 ADC110 XOR +* 011 SBB111 CMP +*/ + + /* 64bit only */ + if (!rex_w) break; - if (modrm == 0xe4) { + /* %rsp target only */ + if (!(modrm_mod == 3 && modrm_rm == CFI_SP)) + break; + + imm = insn.immediate.value; + if (op1 & 2) { /* sign extend */ + if (op1 & 1) { /* imm32 */ + imm <<= 32; + imm = (s64)imm >> 32; + } else { /* imm8 */ + imm <<= 56; + imm = (s64)imm >> 56; + } + } + + switch (modrm_reg & 7) { + case 5: + imm = -imm; + /* fallthrough */ + case 0: + /* add/sub imm, %rsp */ + ADD_OP(op) { + op->src.type = OP_SRC_ADD; + op->src.reg = CFI_SP; + op->src.offset = imm; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + } + break; + + case 4: /* and imm, %rsp */ ADD_OP(op) { op->src.type = OP_SRC_AND; @@ -215,23 +258,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, op->dest.reg = CFI_SP; } break; - } - if (modrm == 0xc4) - sign = 1; - else if (modrm == 0xec) - sign = -1; - else + default: + /* WARN ? */ break; - - /* add/sub imm, %rsp */ - ADD_OP(op) { - op->src.type = OP_SRC_ADD; - op->src.reg = CFI_SP; - op->src.offset = insn.immediate.value * sign; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; } + break; case 0x89:
[tip: objtool/core] objtool: Collate parse_options() users
The following commit has been merged into the objtool/core branch of tip: Commit-ID: 13d52bf07c55645f9e3c430748708253d724e705 Gitweb: https://git.kernel.org/tip/13d52bf07c55645f9e3c430748708253d724e705 Author:Peter Zijlstra AuthorDate:Fri, 26 Feb 2021 11:18:24 +01:00 Committer: Peter Zijlstra CommitterDate: Wed, 03 Mar 2021 09:38:32 +01:00 objtool: Collate parse_options() users Ensure there's a single place that parses check_options, in preparation for extending where to get options from. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20210226110004.193108...@infradead.org --- tools/objtool/builtin-check.c | 14 +- tools/objtool/builtin-orc.c | 5 + tools/objtool/include/objtool/builtin.h | 2 ++ 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 97f063d..0399752 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -42,17 +42,21 @@ const struct option check_options[] = { OPT_END(), }; +int cmd_parse_options(int argc, const char **argv, const char * const usage[]) +{ + argc = parse_options(argc, argv, check_options, usage, 0); + if (argc != 1) + usage_with_options(usage, check_options); + return argc; +} + int cmd_check(int argc, const char **argv) { const char *objname; struct objtool_file *file; int ret; - argc = parse_options(argc, argv, check_options, check_usage, 0); - - if (argc != 1) - usage_with_options(check_usage, check_options); - + argc = cmd_parse_options(argc, argv, check_usage); objname = argv[0]; file = objtool_open_read(objname); diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c index 8273bbf..17f8b93 100644 --- a/tools/objtool/builtin-orc.c +++ b/tools/objtool/builtin-orc.c @@ -34,10 +34,7 @@ int cmd_orc(int argc, const char **argv) struct objtool_file *file; int ret; - argc = parse_options(argc, argv, check_options, orc_usage, 0); - if (argc != 1) - usage_with_options(orc_usage, check_options); - + argc = cmd_parse_options(argc, argv, orc_usage); objname = argv[0]; file = objtool_open_read(objname); diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index d019210..15ac0b7 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -11,6 +11,8 @@ extern const struct option check_options[]; extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr, backup; +extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); + extern int cmd_check(int argc, const char **argv); extern int cmd_orc(int argc, const char **argv);
[tip: objtool/core] objtool,x86: Support %riz encodings
The following commit has been merged into the objtool/core branch of tip: Commit-ID: 0a8bef63e5bf4496251f7bac4ddadb5f5f489932 Gitweb: https://git.kernel.org/tip/0a8bef63e5bf4496251f7bac4ddadb5f5f489932 Author:Peter Zijlstra AuthorDate:Wed, 10 Feb 2021 11:47:35 +01:00 Committer: Peter Zijlstra CommitterDate: Wed, 03 Mar 2021 09:38:30 +01:00 objtool,x86: Support %riz encodings When there's a SIB byte, the register otherwise denoted by r/m will then be denoted by SIB.base REX.b will now extend this. SIB.index == SP is magic and notes an index value zero. This means that there's a bunch of alternative (longer) encodings for the same thing. Eg. 'ModRM.mod != 3, ModRM.r/m = AX' can be encoded as 'ModRM.mod != 3, ModRM.r/m = SP, SIB.base = AX, SIB.index = SP' which is actually 4 different encodings because the value of SIB.scale is irrelevant, giving rise to 5 different but equal encodings. Support these encodings and clean up the SIB handling in general. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173627.472967...@infradead.org --- tools/objtool/arch/x86/decode.c | 67 ++-- 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 5ce7dc4..78ae5be 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -72,6 +72,25 @@ unsigned long arch_jump_destination(struct instruction *insn) return -1; \ else for (list_add_tail(>list, ops_list); op; op = NULL) +/* + * Helpers to decode ModRM/SIB: + * + * r/m| AX CX DX BX | SP | BP | SI DI | + *| R8 R9 R10 R11 | R12 | R13 | R14 R15 | + * Mod++-+-+-+ + * 00 |[r/m] |[SIB]|[IP+]| [r/m] | + * 01 | [r/m + d8]|[S+d]| [r/m + d8] | + * 10 | [r/m + d32] |[S+D]| [r/m + d32] | + * 11 | r/ m | + * + */ +#define is_RIP() ((modrm_rm & 7) == CFI_BP && modrm_mod == 0) +#define have_SIB() ((modrm_rm & 7) == CFI_SP && modrm_mod != 3) + +#define rm_is(reg) (have_SIB() ? \ + sib_base == (reg) && sib_index == CFI_SP : \ + modrm_rm == (reg)) + int arch_decode_instruction(const struct elf *elf, const struct section *sec, unsigned long offset, unsigned int maxlen, unsigned int *len, enum insn_type *type, @@ -83,7 +102,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, - sib = 0 /* , sib_scale = 0, sib_index = 0, sib_base = 0 */; + sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0; struct stack_op *op = NULL; struct symbol *sym; @@ -125,11 +144,9 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, if (insn.sib.nbytes) { sib = insn.sib.bytes[0]; - /* - sib_scale = X86_SIB_SCALE(sib); + /* sib_scale = X86_SIB_SCALE(sib); */ sib_index = X86_SIB_INDEX(sib) + 8*rex_x; sib_base = X86_SIB_BASE(sib) + 8*rex_b; -*/ } switch (op1) { @@ -218,7 +235,10 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; case 0x89: - if (rex_w && modrm_reg == CFI_SP) { + if (!rex_w) + break; + + if (modrm_reg == CFI_SP) { if (modrm_mod == 3) { /* mov %rsp, reg */ @@ -231,14 +251,17 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; } else { - /* skip nontrivial SIB */ - if ((modrm_rm & 7) == 4 && !(sib == 0x24 && rex_b == rex_x)) - break; - /* skip RIP relative displacement */ - if ((modrm_rm & 7) == 5 && modrm_mod == 0) + if (is_RIP()) break; + /* skip nontrivial SIB */ + if (have_SIB()) { + modrm_rm = sib_base; + if (sib_index != CFI_SP) + break; + } + /* mov %rsp, disp(%reg) */ ADD_OP(op) {
[tip: objtool/core] objtool: Parse options from OBJTOOL_ARGS
The following commit has been merged into the objtool/core branch of tip: Commit-ID: b52eb21aeca75790869c26b91b1d7b80b3946430 Gitweb: https://git.kernel.org/tip/b52eb21aeca75790869c26b91b1d7b80b3946430 Author:Peter Zijlstra AuthorDate:Fri, 26 Feb 2021 11:32:30 +01:00 Committer: Peter Zijlstra CommitterDate: Wed, 03 Mar 2021 09:38:32 +01:00 objtool: Parse options from OBJTOOL_ARGS Teach objtool to parse options from the OBJTOOL_ARGS environment variable. This enables things like: $ OBJTOOL_ARGS="--backup" make O=defconfig-build/ kernel/ponies.o to obtain both defconfig-build/kernel/ponies.o{,.orig} and easily inspect what objtool actually did. Suggested-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20210226110004.252553...@infradead.org --- tools/objtool/builtin-check.c | 25 + 1 file changed, 25 insertions(+) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 0399752..8b38b5d 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -26,6 +27,11 @@ static const char * const check_usage[] = { NULL, }; +static const char * const env_usage[] = { + "OBJTOOL_ARGS=\"\"", + NULL, +}; + const struct option check_options[] = { OPT_BOOLEAN('f', "no-fp", _fp, "Skip frame pointer validation"), OPT_BOOLEAN('u', "no-unreachable", _unreachable, "Skip 'unreachable instruction' warnings"), @@ -44,6 +50,25 @@ const struct option check_options[] = { int cmd_parse_options(int argc, const char **argv, const char * const usage[]) { + const char *envv[16] = { }; + char *env; + int envc; + + env = getenv("OBJTOOL_ARGS"); + if (env) { + envv[0] = "OBJTOOL_ARGS"; + for (envc = 1; envc < ARRAY_SIZE(envv); ) { + envv[envc++] = env; + env = strchr(env, ' '); + if (!env) + break; + *env = '\0'; + env++; + } + + parse_options(envc, envv, check_options, env_usage, 0); + } + argc = parse_options(argc, argv, check_options, usage, 0); if (argc != 1) usage_with_options(usage, check_options);
[tip: objtool/core] objtool: Allow UNWIND_HINT to suppress dodgy stack modifications
The following commit has been merged into the objtool/core branch of tip: Commit-ID: 8c0cca513be9e3dd9c17b55b72b66751f3487577 Gitweb: https://git.kernel.org/tip/8c0cca513be9e3dd9c17b55b72b66751f3487577 Author:Peter Zijlstra AuthorDate:Thu, 11 Feb 2021 13:03:28 +01:00 Committer: Peter Zijlstra CommitterDate: Wed, 03 Mar 2021 09:38:29 +01:00 objtool: Allow UNWIND_HINT to suppress dodgy stack modifications rewind_stack_do_exit() UNWIND_HINT_FUNC /* Prevent any naive code from trying to unwind to our caller. */ xorl%ebp, %ebp movqPER_CPU_VAR(cpu_current_top_of_stack), %rax leaq-PTREGS_SIZE(%rax), %rsp UNWIND_HINT_REGS calldo_exit Does unspeakable things to the stack, which objtool currently fails to detect due to a limitation in instruction decoding. This will be rectified after which the above will result in: arch/x86/entry/entry_64.o: warning: objtool: .text+0xab: unsupported stack register modification Allow the UNWIND_HINT on the next instruction to suppress this, it will overwrite the state anyway. Suggested-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Tested-by: Nick Desaulniers Link: https://lkml.kernel.org/r/20210211173626.918498...@infradead.org --- tools/objtool/check.c | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 068cdb4..12b8f0f 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1959,8 +1959,9 @@ static void restore_reg(struct cfi_state *cfi, unsigned char reg) * 41 5d pop%r13 * c3retq */ -static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, -struct stack_op *op) +static int update_cfi_state(struct instruction *insn, + struct instruction *next_insn, + struct cfi_state *cfi, struct stack_op *op) { struct cfi_reg *cfa = >cfa; struct cfi_reg *regs = cfi->regs; @@ -2161,7 +2162,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, break; } - if (op->dest.reg == cfi->cfa.base) { + if (op->dest.reg == cfi->cfa.base && !(next_insn && next_insn->hint)) { WARN_FUNC("unsupported stack register modification", insn->sec, insn->offset); return -1; @@ -2433,13 +2434,15 @@ static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn return 0; } -static int handle_insn_ops(struct instruction *insn, struct insn_state *state) +static int handle_insn_ops(struct instruction *insn, + struct instruction *next_insn, + struct insn_state *state) { struct stack_op *op; list_for_each_entry(op, >stack_ops, list) { - if (update_cfi_state(insn, >cfi, op)) + if (update_cfi_state(insn, next_insn, >cfi, op)) return 1; if (op->dest.type == OP_DEST_PUSHF) { @@ -2719,7 +2722,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 0; } - if (handle_insn_ops(insn, )) + if (handle_insn_ops(insn, next_insn, )) return 1; switch (insn->type) {
[tip: sched/urgent] sched: Simplify migration_cpu_stop()
The following commit has been merged into the sched/urgent branch of tip: Commit-ID: 6430eb536a97036b1d529cbf383cfe36e41a2f97 Gitweb: https://git.kernel.org/tip/6430eb536a97036b1d529cbf383cfe36e41a2f97 Author:Peter Zijlstra AuthorDate:Wed, 24 Feb 2021 11:50:39 +01:00 Committer: Peter Zijlstra CommitterDate: Mon, 01 Mar 2021 11:02:13 +01:00 sched: Simplify migration_cpu_stop() When affine_move_task() issues a migration_cpu_stop(), the purpose of that function is to complete that @pending, not any random other p->migration_pending that might have gotten installed since. This realization much simplifies migration_cpu_stop() and allows further necessary steps to fix all this as it provides the guarantee that @pending's stopper will complete @pending (and not some random other @pending). Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()") Cc: sta...@kernel.org Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210224131355.430014...@infradead.org --- kernel/sched/core.c | 56 ++-- 1 file changed, 8 insertions(+), 48 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 79ddba5..088e8f4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1898,8 +1898,8 @@ static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf, */ static int migration_cpu_stop(void *data) { - struct set_affinity_pending *pending; struct migration_arg *arg = data; + struct set_affinity_pending *pending = arg->pending; struct task_struct *p = arg->task; int dest_cpu = arg->dest_cpu; struct rq *rq = this_rq(); @@ -1921,25 +1921,6 @@ static int migration_cpu_stop(void *data) raw_spin_lock(>pi_lock); rq_lock(rq, ); - pending = p->migration_pending; - if (pending && !arg->pending) { - /* -* This happens from sched_exec() and migrate_task_to(), -* neither of them care about pending and just want a task to -* maybe move about. -* -* Even if there is a pending, we can ignore it, since -* affine_move_task() will have it's own stop_work's in flight -* which will manage the completion. -* -* Notably, pending doesn't need to match arg->pending. This can -* happen when tripple concurrent affine_move_task() first sets -* pending, then clears pending and eventually sets another -* pending. -*/ - pending = NULL; - } - /* * If task_rq(p) != rq, it cannot be migrated here, because we're * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because @@ -1950,31 +1931,20 @@ static int migration_cpu_stop(void *data) goto out; if (pending) { - p->migration_pending = NULL; + if (p->migration_pending == pending) + p->migration_pending = NULL; complete = true; } - /* migrate_enable() -- we must not race against SCA */ - if (dest_cpu < 0) { - /* -* When this was migrate_enable() but we no longer -* have a @pending, a concurrent SCA 'fixed' things -* and we should be valid again. Nothing to do. -*/ - if (!pending) { - WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), >cpus_mask)); - goto out; - } - + if (dest_cpu < 0) dest_cpu = cpumask_any_distribute(>cpus_mask); - } if (task_on_rq_queued(p)) rq = __migrate_task(rq, , p, dest_cpu); else p->wake_cpu = dest_cpu; - } else if (dest_cpu < 0 || pending) { + } else if (pending) { /* * This happens when we get migrated between migrate_enable()'s * preempt_enable() and scheduling the stopper task. At that @@ -1989,23 +1959,14 @@ static int migration_cpu_stop(void *data) * ->pi_lock, so the allowed mask is stable - if it got * somewhere allowed, we're done. */ - if (pending && cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) { - p->migration_pending = NULL; + if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) { + if (p->migration_pending == pending) + p->migration_pending = NULL; complete = true; goto out;
[tip: sched/urgent] sched: Fix migration_cpu_stop() requeueing
The following commit has been merged into the sched/urgent branch of tip: Commit-ID: b8e45e2a14bab684713f5dfc70c9e578c333dcdd Gitweb: https://git.kernel.org/tip/b8e45e2a14bab684713f5dfc70c9e578c333dcdd Author:Peter Zijlstra AuthorDate:Sat, 13 Feb 2021 13:10:35 +01:00 Committer: Peter Zijlstra CommitterDate: Mon, 01 Mar 2021 11:02:13 +01:00 sched: Fix migration_cpu_stop() requeueing When affine_move_task(p) is called on a running task @p, which is not otherwise already changing affinity, we'll first set p->migration_pending and then do: stop_one_cpu(cpu_of_rq(rq), migration_cpu_stop, ); This then gets us to migration_cpu_stop() running on the CPU that was previously running our victim task @p. If we find that our task is no longer on that runqueue (this can happen because of a concurrent migration due to load-balance etc.), then we'll end up at the: } else if (dest_cpu < 1 || pending) { branch. Which we'll take because we set pending earlier. Here we first check if the task @p has already satisfied the affinity constraints, if so we bail early [A]. Otherwise we'll reissue migration_cpu_stop() onto the CPU that is now hosting our task @p: stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, >arg, >stop_work); Except, we've never initialized pending->arg, which will be all 0s. This then results in running migration_cpu_stop() on the next CPU with arg->p == NULL, which gives the by now obvious result of fireworks. The cure is to change affine_move_task() to always use pending->arg, furthermore we can use the exact same pattern as the SCA_MIGRATE_ENABLE case, since we'll block on the pending->done completion anyway, no point in adding yet another completion in stop_one_cpu(). This then gives a clear distinction between the two migration_cpu_stop() use cases: - sched_exec() / migrate_task_to() : arg->pending == NULL - affine_move_task() : arg->pending != NULL; And we can have it ignore p->migration_pending when !arg->pending. Any stop work from sched_exec() / migrate_task_to() is in addition to stop works from affine_move_task(), which will be sufficient to issue the completion. Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()") Cc: sta...@kernel.org Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210224131355.357743...@infradead.org --- kernel/sched/core.c | 39 --- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ca2bb62..79ddba5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1922,6 +1922,24 @@ static int migration_cpu_stop(void *data) rq_lock(rq, ); pending = p->migration_pending; + if (pending && !arg->pending) { + /* +* This happens from sched_exec() and migrate_task_to(), +* neither of them care about pending and just want a task to +* maybe move about. +* +* Even if there is a pending, we can ignore it, since +* affine_move_task() will have it's own stop_work's in flight +* which will manage the completion. +* +* Notably, pending doesn't need to match arg->pending. This can +* happen when tripple concurrent affine_move_task() first sets +* pending, then clears pending and eventually sets another +* pending. +*/ + pending = NULL; + } + /* * If task_rq(p) != rq, it cannot be migrated here, because we're * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because @@ -2194,10 +2212,6 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag int dest_cpu, unsigned int flags) { struct set_affinity_pending my_pending = { }, *pending = NULL; - struct migration_arg arg = { - .task = p, - .dest_cpu = dest_cpu, - }; bool complete = false; /* Can the task run on the task's current CPU? If so, we're done */ @@ -2235,6 +2249,12 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag /* Install the request */ refcount_set(_pending.refs, 1); init_completion(_pending.done); + my_pending.arg = (struct migration_arg) { + .task = p, + .dest_cpu = -1, /* any */ + .pending = _pending, + }; + p->migration_pending = _pending; } else { pending = p->migration_pending; @@ -2265,12 +2285,6 @@ static int
[tip: sched/urgent] sched: Collate affine_move_task() stoppers
The following commit has been merged into the sched/urgent branch of tip: Commit-ID: dbf983c0a5c37da2d476564792bd84e0e8f067fc Gitweb: https://git.kernel.org/tip/dbf983c0a5c37da2d476564792bd84e0e8f067fc Author:Peter Zijlstra AuthorDate:Wed, 24 Feb 2021 11:15:23 +01:00 Committer: Peter Zijlstra CommitterDate: Mon, 01 Mar 2021 11:02:14 +01:00 sched: Collate affine_move_task() stoppers The SCA_MIGRATE_ENABLE and task_running() cases are almost identical, collapse them to avoid further duplication. Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()") Cc: sta...@kernel.org Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210224131355.500108...@infradead.org --- kernel/sched/core.c | 23 --- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 088e8f4..84b657f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2239,30 +2239,23 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag return -EINVAL; } - if (flags & SCA_MIGRATE_ENABLE) { - - refcount_inc(>refs); /* pending->{arg,stop_work} */ - p->migration_flags &= ~MDF_PUSH; - task_rq_unlock(rq, p, rf); - - stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, - >arg, >stop_work); - - return 0; - } - if (task_running(rq, p) || p->state == TASK_WAKING) { /* -* Lessen races (and headaches) by delegating -* is_migration_disabled(p) checks to the stopper, which will -* run on the same CPU as said p. +* MIGRATE_ENABLE gets here because 'p == current', but for +* anything else we cannot do is_migration_disabled(), punt +* and have the stopper function handle it all race-free. */ + refcount_inc(>refs); /* pending->{arg,stop_work} */ + if (flags & SCA_MIGRATE_ENABLE) + p->migration_flags &= ~MDF_PUSH; task_rq_unlock(rq, p, rf); stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, >arg, >stop_work); + if (flags & SCA_MIGRATE_ENABLE) + return 0; } else { if (!is_migration_disabled(p)) {
[tip: sched/urgent] sched: Optimize migration_cpu_stop()
The following commit has been merged into the sched/urgent branch of tip: Commit-ID: 9eca0f53b1c2f5acb85e84673e263bf996817a24 Gitweb: https://git.kernel.org/tip/9eca0f53b1c2f5acb85e84673e263bf996817a24 Author:Peter Zijlstra AuthorDate:Wed, 24 Feb 2021 11:21:35 +01:00 Committer: Peter Zijlstra CommitterDate: Mon, 01 Mar 2021 11:02:14 +01:00 sched: Optimize migration_cpu_stop() When the purpose of migration_cpu_stop() is to migrate the task to 'any' valid CPU, don't migrate the task when it's already running on a valid CPU. Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()") Cc: sta...@kernel.org Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210224131355.569238...@infradead.org --- kernel/sched/core.c | 13 - 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 84b657f..ac05afb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1936,14 +1936,25 @@ static int migration_cpu_stop(void *data) complete = true; } - if (dest_cpu < 0) + if (dest_cpu < 0) { + if (cpumask_test_cpu(task_cpu(p), >cpus_mask)) + goto out; + dest_cpu = cpumask_any_distribute(>cpus_mask); + } if (task_on_rq_queued(p)) rq = __migrate_task(rq, , p, dest_cpu); else p->wake_cpu = dest_cpu; + /* +* XXX __migrate_task() can fail, at which point we might end +* up running on a dodgy CPU, AFAICT this can only happen +* during CPU hotplug, at which point we'll get pushed out +* anyway, so it's probably not a big deal. +*/ + } else if (pending) { /* * This happens when we get migrated between migrate_enable()'s
[tip: sched/urgent] sched: Fix affine_move_task() self-concurrency
The following commit has been merged into the sched/urgent branch of tip: Commit-ID: de8115ef5c83ef2c9941684019d59f4c2e5d16ce Gitweb: https://git.kernel.org/tip/de8115ef5c83ef2c9941684019d59f4c2e5d16ce Author:Peter Zijlstra AuthorDate:Wed, 24 Feb 2021 11:31:09 +01:00 Committer: Peter Zijlstra CommitterDate: Mon, 01 Mar 2021 11:02:14 +01:00 sched: Fix affine_move_task() self-concurrency Consider: sched_setaffinity(p, X); sched_setaffinity(p, Y); Then the first will install p->migration_pending = _pending; and issue stop_one_cpu_nowait(pending); and the second one will read p->migration_pending and _also_ issue: stop_one_cpu_nowait(pending), the _SAME_ @pending. This causes stopper list corruption. Add set_affinity_pending::stop_pending, to indicate if a stopper is in progress. Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()") Cc: sta...@kernel.org Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210224131355.649146...@infradead.org --- kernel/sched/core.c | 15 --- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ac05afb..4e4d100 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1864,6 +1864,7 @@ struct migration_arg { struct set_affinity_pending { refcount_t refs; + unsigned intstop_pending; struct completion done; struct cpu_stop_workstop_work; struct migration_argarg; @@ -1982,12 +1983,15 @@ static int migration_cpu_stop(void *data) * determine is_migration_disabled() and so have to chase after * it. */ + WARN_ON_ONCE(!pending->stop_pending); task_rq_unlock(rq, p, ); stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop, >arg, >stop_work); return 0; } out: + if (pending) + pending->stop_pending = false; task_rq_unlock(rq, p, ); if (complete) @@ -2183,7 +2187,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag int dest_cpu, unsigned int flags) { struct set_affinity_pending my_pending = { }, *pending = NULL; - bool complete = false; + bool stop_pending, complete = false; /* Can the task run on the task's current CPU? If so, we're done */ if (cpumask_test_cpu(task_cpu(p), >cpus_mask)) { @@ -2256,14 +2260,19 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag * anything else we cannot do is_migration_disabled(), punt * and have the stopper function handle it all race-free. */ + stop_pending = pending->stop_pending; + if (!stop_pending) + pending->stop_pending = true; refcount_inc(>refs); /* pending->{arg,stop_work} */ if (flags & SCA_MIGRATE_ENABLE) p->migration_flags &= ~MDF_PUSH; task_rq_unlock(rq, p, rf); - stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, - >arg, >stop_work); + if (!stop_pending) { + stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, + >arg, >stop_work); + } if (flags & SCA_MIGRATE_ENABLE) return 0;
[tip: sched/urgent] sched: Simplify set_affinity_pending refcounts
The following commit has been merged into the sched/urgent branch of tip: Commit-ID: a4c2579076dc6951709a8e425df8369ab6eb2f24 Gitweb: https://git.kernel.org/tip/a4c2579076dc6951709a8e425df8369ab6eb2f24 Author:Peter Zijlstra AuthorDate:Wed, 24 Feb 2021 11:42:08 +01:00 Committer: Peter Zijlstra CommitterDate: Mon, 01 Mar 2021 11:02:15 +01:00 sched: Simplify set_affinity_pending refcounts Now that we have set_affinity_pending::stop_pending to indicate if a stopper is in progress, and we have the guarantee that if that stopper exists, it will (eventually) complete our @pending we can simplify the refcount scheme by no longer counting the stopper thread. Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()") Cc: sta...@kernel.org Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210224131355.724130...@infradead.org --- kernel/sched/core.c | 32 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4e4d100..9819121 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1862,6 +1862,10 @@ struct migration_arg { struct set_affinity_pending *pending; }; +/* + * @refs: number of wait_for_completion() + * @stop_pending: is @stop_work in use + */ struct set_affinity_pending { refcount_t refs; unsigned intstop_pending; @@ -1997,10 +2001,6 @@ out: if (complete) complete_all(>done); - /* For pending->{arg,stop_work} */ - if (pending && refcount_dec_and_test(>refs)) - wake_up_var(>refs); - return 0; } @@ -2199,12 +2199,16 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag push_task = get_task_struct(p); } + /* +* If there are pending waiters, but no pending stop_work, +* then complete now. +*/ pending = p->migration_pending; - if (pending) { - refcount_inc(>refs); + if (pending && !pending->stop_pending) { p->migration_pending = NULL; complete = true; } + task_rq_unlock(rq, p, rf); if (push_task) { @@ -2213,7 +2217,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag } if (complete) - goto do_complete; + complete_all(>done); return 0; } @@ -2264,9 +2268,9 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag if (!stop_pending) pending->stop_pending = true; - refcount_inc(>refs); /* pending->{arg,stop_work} */ if (flags & SCA_MIGRATE_ENABLE) p->migration_flags &= ~MDF_PUSH; + task_rq_unlock(rq, p, rf); if (!stop_pending) { @@ -2282,12 +2286,13 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag if (task_on_rq_queued(p)) rq = move_queued_task(rq, rf, p, dest_cpu); - p->migration_pending = NULL; - complete = true; + if (!pending->stop_pending) { + p->migration_pending = NULL; + complete = true; + } } task_rq_unlock(rq, p, rf); -do_complete: if (complete) complete_all(>done); } @@ -2295,7 +2300,7 @@ do_complete: wait_for_completion(>done); if (refcount_dec_and_test(>refs)) - wake_up_var(>refs); + wake_up_var(>refs); /* No UaF, just an address */ /* * Block the original owner of until all subsequent callers @@ -2303,6 +2308,9 @@ do_complete: */ wait_var_event(_pending.refs, !refcount_read(_pending.refs)); + /* ARGH */ + WARN_ON_ONCE(my_pending.stop_pending); + return 0; }
[tip: locking/urgent] static_call: Fix the module key fixup
The following commit has been merged into the locking/urgent branch of tip: Commit-ID: 8b97c027dfe4ba195be08fd0e18f716005763b8a Gitweb: https://git.kernel.org/tip/8b97c027dfe4ba195be08fd0e18f716005763b8a Author:Peter Zijlstra AuthorDate:Thu, 25 Feb 2021 23:03:51 +01:00 Committer: Peter Zijlstra CommitterDate: Mon, 01 Mar 2021 11:02:10 +01:00 static_call: Fix the module key fixup Provided the target address of a R_X86_64_PC32 relocation is aligned, the low two bits should be invariant between the relative and absolute value. Turns out the address is not aligned and things go sideways, ensure we transfer the bits in the absolute form when fixing up the key address. Fixes: 73f44fe19d35 ("static_call: Allow module use without exposing static_call_key") Reported-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Tested-by: Steven Rostedt (VMware) Link: https://lkml.kernel.org/r/20210225220351.ge4...@worktop.programming.kicks-ass.net --- kernel/static_call.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/static_call.c b/kernel/static_call.c index 6906c6e..ae82529 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -349,7 +349,8 @@ static int static_call_add_module(struct module *mod) struct static_call_site *site; for (site = start; site != stop; site++) { - unsigned long addr = (unsigned long)static_call_key(site); + unsigned long s_key = (long)site->key + (long)>key; + unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS; unsigned long key; /* @@ -373,8 +374,8 @@ static int static_call_add_module(struct module *mod) return -EINVAL; } - site->key = (key - (long)>key) | - (site->key & STATIC_CALL_SITE_FLAGS); + key |= s_key & STATIC_CALL_SITE_FLAGS; + site->key = key - (long)>key; } return __static_call_init(mod, start, stop);
[tip: x86/entry] objtool: Fix stack-swizzle for FRAME_POINTER=y
The following commit has been merged into the x86/entry branch of tip: Commit-ID: 724c8a23d589d8a002d2e39633c2f9a5a429616f Gitweb: https://git.kernel.org/tip/724c8a23d589d8a002d2e39633c2f9a5a429616f Author:Peter Zijlstra AuthorDate:Thu, 18 Feb 2021 17:14:10 +01:00 Committer: Thomas Gleixner CommitterDate: Mon, 22 Feb 2021 19:54:09 +01:00 objtool: Fix stack-swizzle for FRAME_POINTER=y When objtool encounters the stack-swizzle: mov %rsp, (%[tos]) mov %[tos], %rsp ... pop %rsp Inside a FRAME_POINTER=y build, things go a little screwy because clearly we're not adjusting the cfa->base. This then results in the pop %rsp not being detected as a restore of cfa->base so it will turn into a regular POP and offset the stack, resulting in: kernel/softirq.o: warning: objtool: do_softirq()+0xdb: return with modified stack frame Therefore, have "mov %[tos], %rsp" act like a PUSH (it sorta is anyway) to balance the things out. We're not too concerned with the actual stack_size for frame-pointer builds, since we don't generate ORC data for them anyway. Fixes: aafeb14e9da2 ("objtool: Support stack-swizzle") Reported-by: kernel test robot Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/yc6uc+rc9kkmq...@hirez.programming.kicks-ass.net --- tools/objtool/check.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 8e74210..2087974 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1983,6 +1983,20 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, } } + else if (op->dest.reg == CFI_SP && +cfi->vals[op->src.reg].base == CFI_SP_INDIRECT && +cfi->vals[op->src.reg].offset == cfa->offset) { + + /* +* The same stack swizzle case 2) as above. But +* because we can't change cfa->base, case 3) +* will become a regular POP. Pretend we're a +* PUSH so things don't go unbalanced. +*/ + cfi->stack_size += 8; + } + + break; case OP_SRC_ADD:
[tip: objtool/core] objtool: Fix stack-swizzle for FRAME_POINTER=y
The following commit has been merged into the objtool/core branch of tip: Commit-ID: 23e34c5988088b8bb4c55905973ca76114cb33ee Gitweb: https://git.kernel.org/tip/23e34c5988088b8bb4c55905973ca76114cb33ee Author:Peter Zijlstra AuthorDate:Thu, 18 Feb 2021 17:14:10 +01:00 Committer: Peter Zijlstra CommitterDate: Mon, 22 Feb 2021 12:05:18 +01:00 objtool: Fix stack-swizzle for FRAME_POINTER=y When objtool encounters the stack-swizzle: mov %rsp, (%[tos]) mov %[tos], %rsp ... pop %rsp Inside a FRAME_POINTER=y build, things go a little screwy because clearly we're not adjusting the cfa->base. This then results in the pop %rsp not being detected as a restore of cfa->base so it will turn into a regular POP and offset the stack, resulting in: kernel/softirq.o: warning: objtool: do_softirq()+0xdb: return with modified stack frame Therefore, have "mov %[tos], %rsp" act like a PUSH (it sorta is anyway) to balance the things out. We're not too concerned with the actual stack_size for frame-pointer builds, since we don't generate ORC data for them anyway. Fixes: aafeb14e9da2 ("objtool: Support stack-swizzle") Reported-by: kernel test robot Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/yc6uc+rc9kkmq...@hirez.programming.kicks-ass.net --- tools/objtool/check.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 62cd211..d7f1496 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1983,6 +1983,20 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, } } + else if (op->dest.reg == CFI_SP && +cfi->vals[op->src.reg].base == CFI_SP_INDIRECT && +cfi->vals[op->src.reg].offset == cfa->offset) { + + /* +* The same stack swizzle case 2) as above. But +* because we can't change cfa->base, case 3) +* will become a regular POP. Pretend we're a +* PUSH so things don't go unbalanced. +*/ + cfi->stack_size += 8; + } + + break; case OP_SRC_ADD:
[tip: sched/core] rbtree, perf: Use new rbtree helpers
The following commit has been merged into the sched/core branch of tip: Commit-ID: a3b89864554bbce1594b7abdb5739fc708c1ca95 Gitweb: https://git.kernel.org/tip/a3b89864554bbce1594b7abdb5739fc708c1ca95 Author:Peter Zijlstra AuthorDate:Wed, 29 Apr 2020 17:05:15 +02:00 Committer: Ingo Molnar CommitterDate: Wed, 17 Feb 2021 14:07:48 +01:00 rbtree, perf: Use new rbtree helpers Reduce rbtree boiler plate by using the new helpers. One noteworthy change is unification of the various (partial) compare functions. We construct a subtree match by forcing the sub-order to always match, see __group_cmp(). Due to 'const' we had to touch cgroup_id(). Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Tejun Heo Acked-by: Davidlohr Bueso --- include/linux/cgroup.h | 4 +- kernel/events/core.c | 195 ++-- 2 files changed, 92 insertions(+), 107 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 451c2d2..4f2f79d 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -307,7 +307,7 @@ void css_task_iter_end(struct css_task_iter *it); * Inline functions. */ -static inline u64 cgroup_id(struct cgroup *cgrp) +static inline u64 cgroup_id(const struct cgroup *cgrp) { return cgrp->kn->id; } @@ -701,7 +701,7 @@ void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen); struct cgroup_subsys_state; struct cgroup; -static inline u64 cgroup_id(struct cgroup *cgrp) { return 1; } +static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; } static inline void css_get(struct cgroup_subsys_state *css) {} static inline void css_put(struct cgroup_subsys_state *css) {} static inline int cgroup_attach_task_all(struct task_struct *from, diff --git a/kernel/events/core.c b/kernel/events/core.c index 55d1879..3d89096 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1595,50 +1595,91 @@ static void perf_event_groups_init(struct perf_event_groups *groups) groups->index = 0; } +static inline struct cgroup *event_cgroup(const struct perf_event *event) +{ + struct cgroup *cgroup = NULL; + +#ifdef CONFIG_CGROUP_PERF + if (event->cgrp) + cgroup = event->cgrp->css.cgroup; +#endif + + return cgroup; +} + /* * Compare function for event groups; * * Implements complex key that first sorts by CPU and then by virtual index * which provides ordering when rotating groups for the same CPU. */ -static bool -perf_event_groups_less(struct perf_event *left, struct perf_event *right) +static __always_inline int +perf_event_groups_cmp(const int left_cpu, const struct cgroup *left_cgroup, + const u64 left_group_index, const struct perf_event *right) { - if (left->cpu < right->cpu) - return true; - if (left->cpu > right->cpu) - return false; + if (left_cpu < right->cpu) + return -1; + if (left_cpu > right->cpu) + return 1; #ifdef CONFIG_CGROUP_PERF - if (left->cgrp != right->cgrp) { - if (!left->cgrp || !left->cgrp->css.cgroup) { - /* -* Left has no cgroup but right does, no cgroups come -* first. -*/ - return true; - } - if (!right->cgrp || !right->cgrp->css.cgroup) { - /* -* Right has no cgroup but left does, no cgroups come -* first. -*/ - return false; - } - /* Two dissimilar cgroups, order by id. */ - if (left->cgrp->css.cgroup->kn->id < right->cgrp->css.cgroup->kn->id) - return true; + { + const struct cgroup *right_cgroup = event_cgroup(right); - return false; + if (left_cgroup != right_cgroup) { + if (!left_cgroup) { + /* +* Left has no cgroup but right does, no +* cgroups come first. +*/ + return -1; + } + if (!right_cgroup) { + /* +* Right has no cgroup but left does, no +* cgroups come first. +*/ + return 1; + } + /* Two dissimilar cgroups, order by id. */ + if (cgroup_id(left_cgroup) < cgroup_id(right_cgroup)) + return -1; + + return 1; + } } #endif - if (left->group_index < right->group_index) -
[tip: sched/core] rbtree, timerqueue: Use rb_add_cached()
The following commit has been merged into the sched/core branch of tip: Commit-ID: 798172b1374e28ecf687d6662fc5fdaec5c65385 Gitweb: https://git.kernel.org/tip/798172b1374e28ecf687d6662fc5fdaec5c65385 Author:Peter Zijlstra AuthorDate:Wed, 29 Apr 2020 17:07:53 +02:00 Committer: Ingo Molnar CommitterDate: Wed, 17 Feb 2021 14:08:01 +01:00 rbtree, timerqueue: Use rb_add_cached() Reduce rbtree boiler plate by using the new helpers. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Davidlohr Bueso --- lib/timerqueue.c | 28 +--- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/lib/timerqueue.c b/lib/timerqueue.c index c527109..cdb9c76 100644 --- a/lib/timerqueue.c +++ b/lib/timerqueue.c @@ -14,6 +14,14 @@ #include #include +#define __node_2_tq(_n) \ + rb_entry((_n), struct timerqueue_node, node) + +static inline bool __timerqueue_less(struct rb_node *a, const struct rb_node *b) +{ + return __node_2_tq(a)->expires < __node_2_tq(b)->expires; +} + /** * timerqueue_add - Adds timer to timerqueue. * @@ -26,28 +34,10 @@ */ bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) { - struct rb_node **p = >rb_root.rb_root.rb_node; - struct rb_node *parent = NULL; - struct timerqueue_node *ptr; - bool leftmost = true; - /* Make sure we don't add nodes that are already added */ WARN_ON_ONCE(!RB_EMPTY_NODE(>node)); - while (*p) { - parent = *p; - ptr = rb_entry(parent, struct timerqueue_node, node); - if (node->expires < ptr->expires) { - p = &(*p)->rb_left; - } else { - p = &(*p)->rb_right; - leftmost = false; - } - } - rb_link_node(>node, parent, p); - rb_insert_color_cached(>node, >rb_root, leftmost); - - return leftmost; + return rb_add_cached(>node, >rb_root, __timerqueue_less); } EXPORT_SYMBOL_GPL(timerqueue_add);
[tip: sched/core] rbtree, uprobes: Use rbtree helpers
The following commit has been merged into the sched/core branch of tip: Commit-ID: a905e84e64083a0ee701f61810badee234050825 Gitweb: https://git.kernel.org/tip/a905e84e64083a0ee701f61810badee234050825 Author:Peter Zijlstra AuthorDate:Wed, 29 Apr 2020 17:06:27 +02:00 Committer: Ingo Molnar CommitterDate: Wed, 17 Feb 2021 14:07:52 +01:00 rbtree, uprobes: Use rbtree helpers Reduce rbtree boilerplate by using the new helpers. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Davidlohr Bueso --- kernel/events/uprobes.c | 80 +++- 1 file changed, 39 insertions(+), 41 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index bf9edd8..fd5160d 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -613,41 +613,56 @@ static void put_uprobe(struct uprobe *uprobe) } } -static int match_uprobe(struct uprobe *l, struct uprobe *r) +static __always_inline +int uprobe_cmp(const struct inode *l_inode, const loff_t l_offset, + const struct uprobe *r) { - if (l->inode < r->inode) + if (l_inode < r->inode) return -1; - if (l->inode > r->inode) + if (l_inode > r->inode) return 1; - if (l->offset < r->offset) + if (l_offset < r->offset) return -1; - if (l->offset > r->offset) + if (l_offset > r->offset) return 1; return 0; } +#define __node_2_uprobe(node) \ + rb_entry((node), struct uprobe, rb_node) + +struct __uprobe_key { + struct inode *inode; + loff_t offset; +}; + +static inline int __uprobe_cmp_key(const void *key, const struct rb_node *b) +{ + const struct __uprobe_key *a = key; + return uprobe_cmp(a->inode, a->offset, __node_2_uprobe(b)); +} + +static inline int __uprobe_cmp(struct rb_node *a, const struct rb_node *b) +{ + struct uprobe *u = __node_2_uprobe(a); + return uprobe_cmp(u->inode, u->offset, __node_2_uprobe(b)); +} + static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset) { - struct uprobe u = { .inode = inode, .offset = offset }; - struct rb_node *n = uprobes_tree.rb_node; - struct uprobe *uprobe; - int match; + struct __uprobe_key key = { + .inode = inode, + .offset = offset, + }; + struct rb_node *node = rb_find(, _tree, __uprobe_cmp_key); - while (n) { - uprobe = rb_entry(n, struct uprobe, rb_node); - match = match_uprobe(, uprobe); - if (!match) - return get_uprobe(uprobe); + if (node) + return __node_2_uprobe(node); - if (match < 0) - n = n->rb_left; - else - n = n->rb_right; - } return NULL; } @@ -668,32 +683,15 @@ static struct uprobe *find_uprobe(struct inode *inode, loff_t offset) static struct uprobe *__insert_uprobe(struct uprobe *uprobe) { - struct rb_node **p = _tree.rb_node; - struct rb_node *parent = NULL; - struct uprobe *u; - int match; + struct rb_node *node; - while (*p) { - parent = *p; - u = rb_entry(parent, struct uprobe, rb_node); - match = match_uprobe(uprobe, u); - if (!match) - return get_uprobe(u); + node = rb_find_add(>rb_node, _tree, __uprobe_cmp); + if (node) + return get_uprobe(__node_2_uprobe(node)); - if (match < 0) - p = >rb_left; - else - p = >rb_right; - - } - - u = NULL; - rb_link_node(>rb_node, parent, p); - rb_insert_color(>rb_node, _tree); /* get access + creation ref */ refcount_set(>ref, 2); - - return u; + return NULL; } /*
[tip: sched/core] static_call/x86: Add __static_call_return0()
The following commit has been merged into the sched/core branch of tip: Commit-ID: 3f2a8fc4b15de18644e8a80a09edda168676e22c Gitweb: https://git.kernel.org/tip/3f2a8fc4b15de18644e8a80a09edda168676e22c Author:Peter Zijlstra AuthorDate:Mon, 18 Jan 2021 15:12:16 +01:00 Committer: Ingo Molnar CommitterDate: Wed, 17 Feb 2021 14:08:43 +01:00 static_call/x86: Add __static_call_return0() Provide a stub function that return 0 and wire up the static call site patching to replace the CALL with a single 5 byte instruction that clears %RAX, the return value register. The function can be cast to any function pointer type that has a single %RAX return (including pointers). Also provide a version that returns an int for convenience. We are clearing the entire %RAX register in any case, whether the return value is 32 or 64 bits, since %RAX is always a scratch register anyway. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20210118141223.123667-2-frede...@kernel.org --- arch/x86/kernel/static_call.c | 17 +++-- include/linux/static_call.h | 12 kernel/static_call.c | 5 + 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index ca9a380..9442c41 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -11,14 +11,26 @@ enum insn_type { RET = 3, /* tramp / site cond-tail-call */ }; +/* + * data16 data16 xorq %rax, %rax - a single 5 byte instruction that clears %rax + * The REX.W cancels the effect of any data16. + */ +static const u8 xor5rax[] = { 0x66, 0x66, 0x48, 0x31, 0xc0 }; + static void __ref __static_call_transform(void *insn, enum insn_type type, void *func) { + const void *emulate = NULL; int size = CALL_INSN_SIZE; const void *code; switch (type) { case CALL: code = text_gen_insn(CALL_INSN_OPCODE, insn, func); + if (func == &__static_call_return0) { + emulate = code; + code = + } + break; case NOP: @@ -41,7 +53,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void if (unlikely(system_state == SYSTEM_BOOTING)) return text_poke_early(insn, code, size); - text_poke_bp(insn, code, size, NULL); + text_poke_bp(insn, code, size, emulate); } static void __static_call_validate(void *insn, bool tail) @@ -54,7 +66,8 @@ static void __static_call_validate(void *insn, bool tail) return; } else { if (opcode == CALL_INSN_OPCODE || - !memcmp(insn, ideal_nops[NOP_ATOMIC5], 5)) + !memcmp(insn, ideal_nops[NOP_ATOMIC5], 5) || + !memcmp(insn, xor5rax, 5)) return; } diff --git a/include/linux/static_call.h b/include/linux/static_call.h index a2c0645..bd6735d 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -142,6 +142,8 @@ extern void __static_call_update(struct static_call_key *key, void *tramp, void extern int static_call_mod_init(struct module *mod); extern int static_call_text_reserved(void *start, void *end); +extern long __static_call_return0(void); + #define DEFINE_STATIC_CALL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ struct static_call_key STATIC_CALL_KEY(name) = {\ @@ -206,6 +208,11 @@ static inline int static_call_text_reserved(void *start, void *end) return 0; } +static inline long __static_call_return0(void) +{ + return 0; +} + #define EXPORT_STATIC_CALL(name) \ EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \ EXPORT_SYMBOL(STATIC_CALL_TRAMP(name)) @@ -222,6 +229,11 @@ struct static_call_key { void *func; }; +static inline long __static_call_return0(void) +{ + return 0; +} + #define DEFINE_STATIC_CALL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ struct static_call_key STATIC_CALL_KEY(name) = {\ diff --git a/kernel/static_call.c b/kernel/static_call.c index 84565c2..0bc11b5 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -438,6 +438,11 @@ int __init static_call_init(void) } early_initcall(static_call_init); +long __static_call_return0(void) +{ + return 0; +} + #ifdef CONFIG_STATIC_CALL_SELFTEST static int func_a(int x)