[tip: sched/core] sched/debug: Rename the sched_debug parameter to sched_verbose

2021-04-17 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: 9406415f46f6127fd31bb66f0260f7a61a8d2786
Gitweb:
https://git.kernel.org/tip/9406415f46f6127fd31bb66f0260f7a61a8d2786
Author:Peter Zijlstra 
AuthorDate:Thu, 15 Apr 2021 18:23:17 +02:00
Committer: Peter Zijlstra 
CommitterDate: Sat, 17 Apr 2021 13:22:44 +02:00

sched/debug: Rename the sched_debug parameter to sched_verbose

CONFIG_SCHED_DEBUG is the build-time Kconfig knob, the boot param
sched_debug and the /debug/sched/debug_enabled knobs control the
sched_debug_enabled variable, but what they really do is make
SCHED_DEBUG more verbose, so rename the lot.

Signed-off-by: Peter Zijlstra (Intel) 
---
 Documentation/admin-guide/kernel-parameters.txt |  2 +-
 Documentation/scheduler/sched-domains.rst   | 10 +-
 kernel/sched/debug.c|  4 ++--
 kernel/sched/sched.h|  2 +-
 kernel/sched/topology.c | 12 ++--
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index 0454572..9e4c026 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4725,7 +4725,7 @@
 
sbni=   [NET] Granch SBNI12 leased line adapter
 
-   sched_debug [KNL] Enables verbose scheduler debug messages.
+   sched_verbose   [KNL] Enables verbose scheduler debug messages.
 
schedstats= [KNL,X86] Enable or disable scheduled statistics.
Allowed values are enable and disable. This feature
diff --git a/Documentation/scheduler/sched-domains.rst 
b/Documentation/scheduler/sched-domains.rst
index 8582fa5..14ea2f2 100644
--- a/Documentation/scheduler/sched-domains.rst
+++ b/Documentation/scheduler/sched-domains.rst
@@ -74,8 +74,8 @@ for a given topology level by creating a 
sched_domain_topology_level array and
 calling set_sched_topology() with this array as the parameter.
 
 The sched-domains debugging infrastructure can be enabled by enabling
-CONFIG_SCHED_DEBUG and adding 'sched_debug' to your cmdline. If you forgot to
-tweak your cmdline, you can also flip the /sys/kernel/debug/sched_debug
-knob. This enables an error checking parse of the sched domains which should
-catch most possible errors (described above). It also prints out the domain
-structure in a visual format.
+CONFIG_SCHED_DEBUG and adding 'sched_debug_verbose' to your cmdline. If you
+forgot to tweak your cmdline, you can also flip the
+/sys/kernel/debug/sched/verbose knob. This enables an error checking parse of
+the sched domains which should catch most possible errors (described above). It
+also prints out the domain structure in a visual format.
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index bf199d6..461342f 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -275,7 +275,7 @@ static const struct file_operations sched_dynamic_fops = {
 
 #endif /* CONFIG_PREEMPT_DYNAMIC */
 
-__read_mostly bool sched_debug_enabled;
+__read_mostly bool sched_debug_verbose;
 
 static const struct seq_operations sched_debug_sops;
 
@@ -300,7 +300,7 @@ static __init int sched_init_debug(void)
debugfs_sched = debugfs_create_dir("sched", NULL);
 
debugfs_create_file("features", 0644, debugfs_sched, NULL, 
_feat_fops);
-   debugfs_create_bool("debug_enabled", 0644, debugfs_sched, 
_debug_enabled);
+   debugfs_create_bool("verbose", 0644, debugfs_sched, 
_debug_verbose);
 #ifdef CONFIG_PREEMPT_DYNAMIC
debugfs_create_file("preempt", 0644, debugfs_sched, NULL, 
_dynamic_fops);
 #endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 55232db..bde7248 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2363,7 +2363,7 @@ extern struct sched_entity *__pick_first_entity(struct 
cfs_rq *cfs_rq);
 extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
 
 #ifdef CONFIG_SCHED_DEBUG
-extern bool sched_debug_enabled;
+extern bool sched_debug_verbose;
 
 extern void print_cfs_stats(struct seq_file *m, int cpu);
 extern void print_rt_stats(struct seq_file *m, int cpu);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index c343aed..55a0a24 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -14,15 +14,15 @@ static cpumask_var_t sched_domains_tmpmask2;
 
 static int __init sched_debug_setup(char *str)
 {
-   sched_debug_enabled = true;
+   sched_debug_verbose = true;
 
return 0;
 }
-early_param("sched_debug", sched_debug_setup);
+early_param("sched_verbose", sched_debug_setup);
 
 static inline bool sched_debug(void)
 {
-   return sched_debug_enabled;
+   return sched_debug_verbose;
 }
 
 #define SD_FLAG(_name, mflags) [__##_name] = { .meta_flags = mflags, .name = 
#_name },
@@ -131,7 +131,7 @@ static void 

[tip: sched/core] sched: Use cpu_dying() to fix balance_push vs hotplug-rollback

2021-04-16 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: b5c4477366fb5e6a2f0f38742c33acd666c07698
Gitweb:
https://git.kernel.org/tip/b5c4477366fb5e6a2f0f38742c33acd666c07698
Author:Peter Zijlstra 
AuthorDate:Thu, 21 Jan 2021 16:09:32 +01:00
Committer: Peter Zijlstra 
CommitterDate: Fri, 16 Apr 2021 17:06:32 +02:00

sched: Use cpu_dying() to fix balance_push vs hotplug-rollback

Use the new cpu_dying() state to simplify and fix the balance_push()
vs CPU hotplug rollback state.

Specifically, we currently rely on notifiers sched_cpu_dying() /
sched_cpu_activate() to terminate balance_push, however if the
cpu_down() fails when we're past sched_cpu_deactivate(), it should
terminate balance_push at that point and not wait until we hit
sched_cpu_activate().

Similarly, when cpu_up() fails and we're going back down, balance_push
should be active, where it currently is not.

So instead, make sure balance_push is enabled below SCHED_AP_ACTIVE
(when !cpu_active()), and gate it's utility with cpu_dying().

Signed-off-by: Peter Zijlstra (Intel) 
Reviewed-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/yhgayef83vqhk...@hirez.programming.kicks-ass.net
---
 kernel/sched/core.c  | 26 +++---
 kernel/sched/sched.h |  1 -
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 95bd6ab..7d031da 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1811,7 +1811,7 @@ static inline bool is_cpu_allowed(struct task_struct *p, 
int cpu)
return cpu_online(cpu);
 
/* Regular kernel threads don't get to stay during offline. */
-   if (cpu_rq(cpu)->balance_push)
+   if (cpu_dying(cpu))
return false;
 
/* But are allowed during online. */
@@ -7638,6 +7638,9 @@ static DEFINE_PER_CPU(struct cpu_stop_work, push_work);
 
 /*
  * Ensure we only run per-cpu kthreads once the CPU goes !active.
+ *
+ * This is enabled below SCHED_AP_ACTIVE; when !cpu_active(), but only
+ * effective when the hotplug motion is down.
  */
 static void balance_push(struct rq *rq)
 {
@@ -7645,12 +7648,19 @@ static void balance_push(struct rq *rq)
 
lockdep_assert_held(>lock);
SCHED_WARN_ON(rq->cpu != smp_processor_id());
+
/*
 * Ensure the thing is persistent until balance_push_set(.on = false);
 */
rq->balance_callback = _push_callback;
 
/*
+* Only active while going offline.
+*/
+   if (!cpu_dying(rq->cpu))
+   return;
+
+   /*
 * Both the cpu-hotplug and stop task are in this case and are
 * required to complete the hotplug process.
 *
@@ -7703,7 +7713,6 @@ static void balance_push_set(int cpu, bool on)
struct rq_flags rf;
 
rq_lock_irqsave(rq, );
-   rq->balance_push = on;
if (on) {
WARN_ON_ONCE(rq->balance_callback);
rq->balance_callback = _push_callback;
@@ -7828,8 +7837,8 @@ int sched_cpu_activate(unsigned int cpu)
struct rq_flags rf;
 
/*
-* Make sure that when the hotplug state machine does a roll-back
-* we clear balance_push. Ideally that would happen earlier...
+* Clear the balance_push callback and prepare to schedule
+* regular tasks.
 */
balance_push_set(cpu, false);
 
@@ -8014,12 +8023,6 @@ int sched_cpu_dying(unsigned int cpu)
}
rq_unlock_irqrestore(rq, );
 
-   /*
-* Now that the CPU is offline, make sure we're welcome
-* to new tasks once we come back up.
-*/
-   balance_push_set(cpu, false);
-
calc_load_migrate(rq);
update_max_interval();
hrtick_clear(rq);
@@ -8204,7 +8207,7 @@ void __init sched_init(void)
rq->sd = NULL;
rq->rd = NULL;
rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
-   rq->balance_callback = NULL;
+   rq->balance_callback = _push_callback;
rq->active_balance = 0;
rq->next_balance = jiffies;
rq->push_cpu = 0;
@@ -8251,6 +8254,7 @@ void __init sched_init(void)
 
 #ifdef CONFIG_SMP
idle_thread_set_boot_cpu();
+   balance_push_set(smp_processor_id(), false);
 #endif
init_sched_fair_class();
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index cbb0b01..7e7e936 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -983,7 +983,6 @@ struct rq {
unsigned long   cpu_capacity_orig;
 
struct callback_head*balance_callback;
-   unsigned char   balance_push;
 
unsigned char   nohz_idle_balance;
unsigned char   idle_balance;


[tip: sched/core] cpumask: Make cpu_{online,possible,present,active}() inline

2021-04-16 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: b02a4fd8148f655095d9e3d6eddd8f0042bcc27c
Gitweb:
https://git.kernel.org/tip/b02a4fd8148f655095d9e3d6eddd8f0042bcc27c
Author:Peter Zijlstra 
AuthorDate:Mon, 25 Jan 2021 16:46:49 +01:00
Committer: Peter Zijlstra 
CommitterDate: Fri, 16 Apr 2021 17:06:32 +02:00

cpumask: Make cpu_{online,possible,present,active}() inline

Prepare for addition of another mask. Primarily a code movement to
avoid having to create more #ifdef, but while there, convert
everything with an argument to an inline function.

Signed-off-by: Peter Zijlstra (Intel) 
Reviewed-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210310150109.045447...@infradead.org
---
 include/linux/cpumask.h | 97 +++-
 1 file changed, 66 insertions(+), 31 deletions(-)

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 383684e..a584336 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -98,37 +98,6 @@ extern struct cpumask __cpu_active_mask;
 
 extern atomic_t __num_online_cpus;
 
-#if NR_CPUS > 1
-/**
- * num_online_cpus() - Read the number of online CPUs
- *
- * Despite the fact that __num_online_cpus is of type atomic_t, this
- * interface gives only a momentary snapshot and is not protected against
- * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held
- * region.
- */
-static inline unsigned int num_online_cpus(void)
-{
-   return atomic_read(&__num_online_cpus);
-}
-#define num_possible_cpus()cpumask_weight(cpu_possible_mask)
-#define num_present_cpus() cpumask_weight(cpu_present_mask)
-#define num_active_cpus()  cpumask_weight(cpu_active_mask)
-#define cpu_online(cpu)cpumask_test_cpu((cpu), cpu_online_mask)
-#define cpu_possible(cpu)  cpumask_test_cpu((cpu), cpu_possible_mask)
-#define cpu_present(cpu)   cpumask_test_cpu((cpu), cpu_present_mask)
-#define cpu_active(cpu)cpumask_test_cpu((cpu), cpu_active_mask)
-#else
-#define num_online_cpus()  1U
-#define num_possible_cpus()1U
-#define num_present_cpus() 1U
-#define num_active_cpus()  1U
-#define cpu_online(cpu)((cpu) == 0)
-#define cpu_possible(cpu)  ((cpu) == 0)
-#define cpu_present(cpu)   ((cpu) == 0)
-#define cpu_active(cpu)((cpu) == 0)
-#endif
-
 extern cpumask_t cpus_booted_once_mask;
 
 static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
@@ -894,6 +863,72 @@ static inline const struct cpumask *get_cpu_mask(unsigned 
int cpu)
return to_cpumask(p);
 }
 
+#if NR_CPUS > 1
+/**
+ * num_online_cpus() - Read the number of online CPUs
+ *
+ * Despite the fact that __num_online_cpus is of type atomic_t, this
+ * interface gives only a momentary snapshot and is not protected against
+ * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held
+ * region.
+ */
+static inline unsigned int num_online_cpus(void)
+{
+   return atomic_read(&__num_online_cpus);
+}
+#define num_possible_cpus()cpumask_weight(cpu_possible_mask)
+#define num_present_cpus() cpumask_weight(cpu_present_mask)
+#define num_active_cpus()  cpumask_weight(cpu_active_mask)
+
+static inline bool cpu_online(unsigned int cpu)
+{
+   return cpumask_test_cpu(cpu, cpu_online_mask);
+}
+
+static inline bool cpu_possible(unsigned int cpu)
+{
+   return cpumask_test_cpu(cpu, cpu_possible_mask);
+}
+
+static inline bool cpu_present(unsigned int cpu)
+{
+   return cpumask_test_cpu(cpu, cpu_present_mask);
+}
+
+static inline bool cpu_active(unsigned int cpu)
+{
+   return cpumask_test_cpu(cpu, cpu_active_mask);
+}
+
+#else
+
+#define num_online_cpus()  1U
+#define num_possible_cpus()1U
+#define num_present_cpus() 1U
+#define num_active_cpus()  1U
+
+static inline bool cpu_online(unsigned int cpu)
+{
+   return cpu == 0;
+}
+
+static inline bool cpu_possible(unsigned int cpu)
+{
+   return cpu == 0;
+}
+
+static inline bool cpu_present(unsigned int cpu)
+{
+   return cpu == 0;
+}
+
+static inline bool cpu_active(unsigned int cpu)
+{
+   return cpu == 0;
+}
+
+#endif /* NR_CPUS > 1 */
+
 #define cpu_is_offline(cpu)unlikely(!cpu_online(cpu))
 
 #if NR_CPUS <= BITS_PER_LONG


[tip: sched/core] cpumask: Introduce DYING mask

2021-04-16 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: e40f74c535b8a0ecf3ef0388b51a34cdadb34fb5
Gitweb:
https://git.kernel.org/tip/e40f74c535b8a0ecf3ef0388b51a34cdadb34fb5
Author:Peter Zijlstra 
AuthorDate:Tue, 19 Jan 2021 18:43:45 +01:00
Committer: Peter Zijlstra 
CommitterDate: Fri, 16 Apr 2021 17:06:32 +02:00

cpumask: Introduce DYING mask

Introduce a cpumask that indicates (for each CPU) what direction the
CPU hotplug is currently going. Notably, it tracks rollbacks. Eg. when
an up fails and we do a roll-back down, it will accurately reflect the
direction.

Signed-off-by: Peter Zijlstra (Intel) 
Reviewed-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210310150109.151441...@infradead.org
---
 include/linux/cpumask.h | 20 
 kernel/cpu.c|  6 ++
 2 files changed, 26 insertions(+)

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index a584336..e6b948a 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -91,10 +91,12 @@ extern struct cpumask __cpu_possible_mask;
 extern struct cpumask __cpu_online_mask;
 extern struct cpumask __cpu_present_mask;
 extern struct cpumask __cpu_active_mask;
+extern struct cpumask __cpu_dying_mask;
 #define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask)
 #define cpu_online_mask   ((const struct cpumask *)&__cpu_online_mask)
 #define cpu_present_mask  ((const struct cpumask *)&__cpu_present_mask)
 #define cpu_active_mask   ((const struct cpumask *)&__cpu_active_mask)
+#define cpu_dying_mask((const struct cpumask *)&__cpu_dying_mask)
 
 extern atomic_t __num_online_cpus;
 
@@ -826,6 +828,14 @@ set_cpu_active(unsigned int cpu, bool active)
cpumask_clear_cpu(cpu, &__cpu_active_mask);
 }
 
+static inline void
+set_cpu_dying(unsigned int cpu, bool dying)
+{
+   if (dying)
+   cpumask_set_cpu(cpu, &__cpu_dying_mask);
+   else
+   cpumask_clear_cpu(cpu, &__cpu_dying_mask);
+}
 
 /**
  * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
@@ -900,6 +910,11 @@ static inline bool cpu_active(unsigned int cpu)
return cpumask_test_cpu(cpu, cpu_active_mask);
 }
 
+static inline bool cpu_dying(unsigned int cpu)
+{
+   return cpumask_test_cpu(cpu, cpu_dying_mask);
+}
+
 #else
 
 #define num_online_cpus()  1U
@@ -927,6 +942,11 @@ static inline bool cpu_active(unsigned int cpu)
return cpu == 0;
 }
 
+static inline bool cpu_dying(unsigned int cpu)
+{
+   return false;
+}
+
 #endif /* NR_CPUS > 1 */
 
 #define cpu_is_offline(cpu)unlikely(!cpu_online(cpu))
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 23505d6..838dcf2 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -160,6 +160,9 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum 
cpuhp_state state,
int (*cb)(unsigned int cpu);
int ret, cnt;
 
+   if (cpu_dying(cpu) != !bringup)
+   set_cpu_dying(cpu, !bringup);
+
if (st->fail == state) {
st->fail = CPUHP_INVALID;
return -EAGAIN;
@@ -2512,6 +2515,9 @@ EXPORT_SYMBOL(__cpu_present_mask);
 struct cpumask __cpu_active_mask __read_mostly;
 EXPORT_SYMBOL(__cpu_active_mask);
 
+struct cpumask __cpu_dying_mask __read_mostly;
+EXPORT_SYMBOL(__cpu_dying_mask);
+
 atomic_t __num_online_cpus __read_mostly;
 EXPORT_SYMBOL(__num_online_cpus);
 


[tip: sched/core] sched: Move SCHED_DEBUG sysctl to debugfs

2021-04-16 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: 8a99b6833c884fa0e7919030d93fecedc69fc625
Gitweb:
https://git.kernel.org/tip/8a99b6833c884fa0e7919030d93fecedc69fc625
Author:Peter Zijlstra 
AuthorDate:Wed, 24 Mar 2021 11:43:21 +01:00
Committer: Peter Zijlstra 
CommitterDate: Fri, 16 Apr 2021 17:06:34 +02:00

sched: Move SCHED_DEBUG sysctl to debugfs

Stop polluting sysctl with undocumented knobs that really are debug
only, move them all to /debug/sched/ along with the existing
/debug/sched_* files that already exist.

Signed-off-by: Peter Zijlstra (Intel) 
Reviewed-by: Greg Kroah-Hartman 
Tested-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210412102001.287610...@infradead.org
---
 include/linux/sched/sysctl.h |  8 +--
 kernel/sched/core.c  |  4 +-
 kernel/sched/debug.c | 74 +--
 kernel/sched/fair.c  |  9 +---
 kernel/sched/sched.h |  2 +-
 kernel/sysctl.c  | 96 +---
 6 files changed, 80 insertions(+), 113 deletions(-)

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 3c31ba8..0a3f346 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -26,10 +26,11 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, 
int write,
 enum { sysctl_hung_task_timeout_secs = 0 };
 #endif
 
+extern unsigned int sysctl_sched_child_runs_first;
+
 extern unsigned int sysctl_sched_latency;
 extern unsigned int sysctl_sched_min_granularity;
 extern unsigned int sysctl_sched_wakeup_granularity;
-extern unsigned int sysctl_sched_child_runs_first;
 
 enum sched_tunable_scaling {
SCHED_TUNABLESCALING_NONE,
@@ -37,7 +38,7 @@ enum sched_tunable_scaling {
SCHED_TUNABLESCALING_LINEAR,
SCHED_TUNABLESCALING_END,
 };
-extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
+extern unsigned int sysctl_sched_tunable_scaling;
 
 extern unsigned int sysctl_numa_balancing_scan_delay;
 extern unsigned int sysctl_numa_balancing_scan_period_min;
@@ -47,9 +48,6 @@ extern unsigned int sysctl_numa_balancing_scan_size;
 #ifdef CONFIG_SCHED_DEBUG
 extern __read_mostly unsigned int sysctl_sched_migration_cost;
 extern __read_mostly unsigned int sysctl_sched_nr_migrate;
-
-int sched_proc_update_handler(struct ctl_table *table, int write,
-   void *buffer, size_t *length, loff_t *ppos);
 #endif
 
 /*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7d031da..bac30db 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5504,9 +5504,11 @@ static const struct file_operations sched_dynamic_fops = 
{
.release= single_release,
 };
 
+extern struct dentry *debugfs_sched;
+
 static __init int sched_init_debug_dynamic(void)
 {
-   debugfs_create_file("sched_preempt", 0644, NULL, NULL, 
_dynamic_fops);
+   debugfs_create_file("sched_preempt", 0644, debugfs_sched, NULL, 
_dynamic_fops);
return 0;
 }
 late_initcall(sched_init_debug_dynamic);
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 4b49cc2..2093b90 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -169,15 +169,81 @@ static const struct file_operations sched_feat_fops = {
.release= single_release,
 };
 
+#ifdef CONFIG_SMP
+
+static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf,
+  size_t cnt, loff_t *ppos)
+{
+   char buf[16];
+
+   if (cnt > 15)
+   cnt = 15;
+
+   if (copy_from_user(, ubuf, cnt))
+   return -EFAULT;
+
+   if (kstrtouint(buf, 10, _sched_tunable_scaling))
+   return -EINVAL;
+
+   if (sched_update_scaling())
+   return -EINVAL;
+
+   *ppos += cnt;
+   return cnt;
+}
+
+static int sched_scaling_show(struct seq_file *m, void *v)
+{
+   seq_printf(m, "%d\n", sysctl_sched_tunable_scaling);
+   return 0;
+}
+
+static int sched_scaling_open(struct inode *inode, struct file *filp)
+{
+   return single_open(filp, sched_scaling_show, NULL);
+}
+
+static const struct file_operations sched_scaling_fops = {
+   .open   = sched_scaling_open,
+   .write  = sched_scaling_write,
+   .read   = seq_read,
+   .llseek = seq_lseek,
+   .release= single_release,
+};
+
+#endif /* SMP */
+
 __read_mostly bool sched_debug_enabled;
 
+struct dentry *debugfs_sched;
+
 static __init int sched_init_debug(void)
 {
-   debugfs_create_file("sched_features", 0644, NULL, NULL,
-   _feat_fops);
+   struct dentry __maybe_unused *numa;
 
-   debugfs_create_bool("sched_debug", 0644, NULL,
-   _debug_enabled);
+   debugfs_sched = debugfs_create_dir("sched", NULL);
+
+   debugfs_create_file("features", 0644, debugfs_sched, NULL, 
_feat_fops);
+   debugfs_create_bool("debug_enabled", 0644, 

[tip: sched/core] sched: Remove sched_schedstats sysctl out from under SCHED_DEBUG

2021-04-16 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: 1d1c2509de4488cc58c924d0a6117c62de1d4f9c
Gitweb:
https://git.kernel.org/tip/1d1c2509de4488cc58c924d0a6117c62de1d4f9c
Author:Peter Zijlstra 
AuthorDate:Wed, 24 Mar 2021 19:47:43 +01:00
Committer: Peter Zijlstra 
CommitterDate: Fri, 16 Apr 2021 17:06:33 +02:00

sched: Remove sched_schedstats sysctl out from under SCHED_DEBUG

CONFIG_SCHEDSTATS does not depend on SCHED_DEBUG, it is inconsistent
to have the sysctl depend on it.

Suggested-by: Mel Gorman 
Signed-off-by: Peter Zijlstra (Intel) 
Tested-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210412102001.161151...@infradead.org
---
 kernel/sysctl.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8042098..17f1cc9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1711,17 +1711,6 @@ static struct ctl_table kern_table[] = {
.mode   = 0644,
.proc_handler   = proc_dointvec,
},
-#ifdef CONFIG_SCHEDSTATS
-   {
-   .procname   = "sched_schedstats",
-   .data   = NULL,
-   .maxlen = sizeof(unsigned int),
-   .mode   = 0644,
-   .proc_handler   = sysctl_schedstats,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-#endif /* CONFIG_SCHEDSTATS */
 #endif /* CONFIG_SMP */
 #ifdef CONFIG_NUMA_BALANCING
{
@@ -1755,6 +1744,17 @@ static struct ctl_table kern_table[] = {
},
 #endif /* CONFIG_NUMA_BALANCING */
 #endif /* CONFIG_SCHED_DEBUG */
+#ifdef CONFIG_SCHEDSTATS
+   {
+   .procname   = "sched_schedstats",
+   .data   = NULL,
+   .maxlen = sizeof(unsigned int),
+   .mode   = 0644,
+   .proc_handler   = sysctl_schedstats,
+   .extra1 = SYSCTL_ZERO,
+   .extra2 = SYSCTL_ONE,
+   },
+#endif /* CONFIG_SCHEDSTATS */
 #ifdef CONFIG_NUMA_BALANCING
{
.procname   = "numa_balancing",


[tip: sched/core] sched,preempt: Move preempt_dynamic to debug.c

2021-04-16 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: 1011dcce99f8026d48fdd7b9cc259e32a8b472be
Gitweb:
https://git.kernel.org/tip/1011dcce99f8026d48fdd7b9cc259e32a8b472be
Author:Peter Zijlstra 
AuthorDate:Thu, 25 Mar 2021 12:21:38 +01:00
Committer: Peter Zijlstra 
CommitterDate: Fri, 16 Apr 2021 17:06:34 +02:00

sched,preempt: Move preempt_dynamic to debug.c

Move the #ifdef SCHED_DEBUG bits to kernel/sched/debug.c in order to
collect all the debugfs bits.

Signed-off-by: Peter Zijlstra (Intel) 
Tested-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210412102001.353833...@infradead.org
---
 kernel/sched/core.c  | 77 +--
 kernel/sched/debug.c | 67 -
 kernel/sched/sched.h | 11 --
 3 files changed, 78 insertions(+), 77 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bac30db..e6c714b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5371,9 +5371,9 @@ enum {
preempt_dynamic_full,
 };
 
-static int preempt_dynamic_mode = preempt_dynamic_full;
+int preempt_dynamic_mode = preempt_dynamic_full;
 
-static int sched_dynamic_mode(const char *str)
+int sched_dynamic_mode(const char *str)
 {
if (!strcmp(str, "none"))
return preempt_dynamic_none;
@@ -5387,7 +5387,7 @@ static int sched_dynamic_mode(const char *str)
return -EINVAL;
 }
 
-static void sched_dynamic_update(int mode)
+void sched_dynamic_update(int mode)
 {
/*
 * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in
@@ -5444,79 +5444,8 @@ static int __init setup_preempt_mode(char *str)
 }
 __setup("preempt=", setup_preempt_mode);
 
-#ifdef CONFIG_SCHED_DEBUG
-
-static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf,
-  size_t cnt, loff_t *ppos)
-{
-   char buf[16];
-   int mode;
-
-   if (cnt > 15)
-   cnt = 15;
-
-   if (copy_from_user(, ubuf, cnt))
-   return -EFAULT;
-
-   buf[cnt] = 0;
-   mode = sched_dynamic_mode(strstrip(buf));
-   if (mode < 0)
-   return mode;
-
-   sched_dynamic_update(mode);
-
-   *ppos += cnt;
-
-   return cnt;
-}
-
-static int sched_dynamic_show(struct seq_file *m, void *v)
-{
-   static const char * preempt_modes[] = {
-   "none", "voluntary", "full"
-   };
-   int i;
-
-   for (i = 0; i < ARRAY_SIZE(preempt_modes); i++) {
-   if (preempt_dynamic_mode == i)
-   seq_puts(m, "(");
-   seq_puts(m, preempt_modes[i]);
-   if (preempt_dynamic_mode == i)
-   seq_puts(m, ")");
-
-   seq_puts(m, " ");
-   }
-
-   seq_puts(m, "\n");
-   return 0;
-}
-
-static int sched_dynamic_open(struct inode *inode, struct file *filp)
-{
-   return single_open(filp, sched_dynamic_show, NULL);
-}
-
-static const struct file_operations sched_dynamic_fops = {
-   .open   = sched_dynamic_open,
-   .write  = sched_dynamic_write,
-   .read   = seq_read,
-   .llseek = seq_lseek,
-   .release= single_release,
-};
-
-extern struct dentry *debugfs_sched;
-
-static __init int sched_init_debug_dynamic(void)
-{
-   debugfs_create_file("sched_preempt", 0644, debugfs_sched, NULL, 
_dynamic_fops);
-   return 0;
-}
-late_initcall(sched_init_debug_dynamic);
-
-#endif /* CONFIG_SCHED_DEBUG */
 #endif /* CONFIG_PREEMPT_DYNAMIC */
 
-
 /*
  * This is the entry point to schedule() from kernel preemption
  * off of irq context.
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 2093b90..bdd344f 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -213,9 +213,71 @@ static const struct file_operations sched_scaling_fops = {
 
 #endif /* SMP */
 
+#ifdef CONFIG_PREEMPT_DYNAMIC
+
+static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf,
+  size_t cnt, loff_t *ppos)
+{
+   char buf[16];
+   int mode;
+
+   if (cnt > 15)
+   cnt = 15;
+
+   if (copy_from_user(, ubuf, cnt))
+   return -EFAULT;
+
+   buf[cnt] = 0;
+   mode = sched_dynamic_mode(strstrip(buf));
+   if (mode < 0)
+   return mode;
+
+   sched_dynamic_update(mode);
+
+   *ppos += cnt;
+
+   return cnt;
+}
+
+static int sched_dynamic_show(struct seq_file *m, void *v)
+{
+   static const char * preempt_modes[] = {
+   "none", "voluntary", "full"
+   };
+   int i;
+
+   for (i = 0; i < ARRAY_SIZE(preempt_modes); i++) {
+   if (preempt_dynamic_mode == i)
+   seq_puts(m, "(");
+   seq_puts(m, preempt_modes[i]);
+   if (preempt_dynamic_mode == i)
+   seq_puts(m, ")");
+
+   seq_puts(m, " 

[tip: sched/core] sched: Don't make LATENCYTOP select SCHED_DEBUG

2021-04-16 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: d86ba831656611872e4939b895503ddac63d8196
Gitweb:
https://git.kernel.org/tip/d86ba831656611872e4939b895503ddac63d8196
Author:Peter Zijlstra 
AuthorDate:Wed, 24 Mar 2021 19:48:34 +01:00
Committer: Peter Zijlstra 
CommitterDate: Fri, 16 Apr 2021 17:06:33 +02:00

sched: Don't make LATENCYTOP select SCHED_DEBUG

SCHED_DEBUG is not in fact required for LATENCYTOP, don't select it.

Suggested-by: Mel Gorman 
Signed-off-by: Peter Zijlstra (Intel) 
Tested-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210412102001.224578...@infradead.org
---
 lib/Kconfig.debug | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2779c29..5f98376 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1670,7 +1670,6 @@ config LATENCYTOP
select KALLSYMS_ALL
select STACKTRACE
select SCHEDSTATS
-   select SCHED_DEBUG
help
  Enable this option if you want to use the LatencyTOP tool
  to find out which userspace is blocking on what kernel operations.


[tip: sched/core] sched,fair: Alternative sched_slice()

2021-04-16 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: 0c2de3f054a59f15e01804b75a04355c48de628c
Gitweb:
https://git.kernel.org/tip/0c2de3f054a59f15e01804b75a04355c48de628c
Author:Peter Zijlstra 
AuthorDate:Thu, 25 Mar 2021 13:44:46 +01:00
Committer: Peter Zijlstra 
CommitterDate: Fri, 16 Apr 2021 17:06:35 +02:00

sched,fair: Alternative sched_slice()

The current sched_slice() seems to have issues; there's two possible
things that could be improved:

 - the 'nr_running' used for __sched_period() is daft when cgroups are
   considered. Using the RQ wide h_nr_running seems like a much more
   consistent number.

 - (esp) cgroups can slice it real fine, which makes for easy
   over-scheduling, ensure min_gran is what the name says.

Signed-off-by: Peter Zijlstra (Intel) 
Tested-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210412102001.611897...@infradead.org
---
 kernel/sched/fair.c | 12 +++-
 kernel/sched/features.h |  3 +++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b3ea14c..49636a4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -687,7 +687,13 @@ static u64 __sched_period(unsigned long nr_running)
  */
 static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-   u64 slice = __sched_period(cfs_rq->nr_running + !se->on_rq);
+   unsigned int nr_running = cfs_rq->nr_running;
+   u64 slice;
+
+   if (sched_feat(ALT_PERIOD))
+   nr_running = rq_of(cfs_rq)->cfs.h_nr_running;
+
+   slice = __sched_period(nr_running + !se->on_rq);
 
for_each_sched_entity(se) {
struct load_weight *load;
@@ -704,6 +710,10 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct 
sched_entity *se)
}
slice = __calc_delta(slice, se->load.weight, load);
}
+
+   if (sched_feat(BASE_SLICE))
+   slice = max(slice, (u64)sysctl_sched_min_granularity);
+
return slice;
 }
 
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 422fa68..011c5ec 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -90,3 +90,6 @@ SCHED_FEAT(WA_BIAS, true)
  */
 SCHED_FEAT(UTIL_EST, true)
 SCHED_FEAT(UTIL_EST_FASTUP, true)
+
+SCHED_FEAT(ALT_PERIOD, true)
+SCHED_FEAT(BASE_SLICE, true)


[tip: sched/core] sched/debug: Rename the sched_debug parameter to sched_verbose

2021-04-16 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: a1b93fc0377e73dd54f819a993f83291324bb54a
Gitweb:
https://git.kernel.org/tip/a1b93fc0377e73dd54f819a993f83291324bb54a
Author:Peter Zijlstra 
AuthorDate:Thu, 15 Apr 2021 18:23:17 +02:00
Committer: Peter Zijlstra 
CommitterDate: Fri, 16 Apr 2021 17:06:35 +02:00

sched/debug: Rename the sched_debug parameter to sched_verbose

CONFIG_SCHED_DEBUG is the build-time Kconfig knob, the boot param
sched_debug and the /debug/sched/debug_enabled knobs control the
sched_debug_enabled variable, but what they really do is make
SCHED_DEBUG more verbose, so rename the lot.

Signed-off-by: Peter Zijlstra (Intel) 
---
 Documentation/admin-guide/kernel-parameters.txt |  2 +-
 Documentation/scheduler/sched-domains.rst   | 10 +-
 kernel/sched/debug.c|  4 ++--
 kernel/sched/topology.c | 12 ++--
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index 0454572..9e4c026 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4725,7 +4725,7 @@
 
sbni=   [NET] Granch SBNI12 leased line adapter
 
-   sched_debug [KNL] Enables verbose scheduler debug messages.
+   sched_verbose   [KNL] Enables verbose scheduler debug messages.
 
schedstats= [KNL,X86] Enable or disable scheduled statistics.
Allowed values are enable and disable. This feature
diff --git a/Documentation/scheduler/sched-domains.rst 
b/Documentation/scheduler/sched-domains.rst
index 8582fa5..14ea2f2 100644
--- a/Documentation/scheduler/sched-domains.rst
+++ b/Documentation/scheduler/sched-domains.rst
@@ -74,8 +74,8 @@ for a given topology level by creating a 
sched_domain_topology_level array and
 calling set_sched_topology() with this array as the parameter.
 
 The sched-domains debugging infrastructure can be enabled by enabling
-CONFIG_SCHED_DEBUG and adding 'sched_debug' to your cmdline. If you forgot to
-tweak your cmdline, you can also flip the /sys/kernel/debug/sched_debug
-knob. This enables an error checking parse of the sched domains which should
-catch most possible errors (described above). It also prints out the domain
-structure in a visual format.
+CONFIG_SCHED_DEBUG and adding 'sched_debug_verbose' to your cmdline. If you
+forgot to tweak your cmdline, you can also flip the
+/sys/kernel/debug/sched/verbose knob. This enables an error checking parse of
+the sched domains which should catch most possible errors (described above). It
+also prints out the domain structure in a visual format.
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index bf199d6..461342f 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -275,7 +275,7 @@ static const struct file_operations sched_dynamic_fops = {
 
 #endif /* CONFIG_PREEMPT_DYNAMIC */
 
-__read_mostly bool sched_debug_enabled;
+__read_mostly bool sched_debug_verbose;
 
 static const struct seq_operations sched_debug_sops;
 
@@ -300,7 +300,7 @@ static __init int sched_init_debug(void)
debugfs_sched = debugfs_create_dir("sched", NULL);
 
debugfs_create_file("features", 0644, debugfs_sched, NULL, 
_feat_fops);
-   debugfs_create_bool("debug_enabled", 0644, debugfs_sched, 
_debug_enabled);
+   debugfs_create_bool("verbose", 0644, debugfs_sched, 
_debug_verbose);
 #ifdef CONFIG_PREEMPT_DYNAMIC
debugfs_create_file("preempt", 0644, debugfs_sched, NULL, 
_dynamic_fops);
 #endif
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index c343aed..55a0a24 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -14,15 +14,15 @@ static cpumask_var_t sched_domains_tmpmask2;
 
 static int __init sched_debug_setup(char *str)
 {
-   sched_debug_enabled = true;
+   sched_debug_verbose = true;
 
return 0;
 }
-early_param("sched_debug", sched_debug_setup);
+early_param("sched_verbose", sched_debug_setup);
 
 static inline bool sched_debug(void)
 {
-   return sched_debug_enabled;
+   return sched_debug_verbose;
 }
 
 #define SD_FLAG(_name, mflags) [__##_name] = { .meta_flags = mflags, .name = 
#_name },
@@ -131,7 +131,7 @@ static void sched_domain_debug(struct sched_domain *sd, int 
cpu)
 {
int level = 0;
 
-   if (!sched_debug_enabled)
+   if (!sched_debug_verbose)
return;
 
if (!sd) {
@@ -152,7 +152,7 @@ static void sched_domain_debug(struct sched_domain *sd, int 
cpu)
 }
 #else /* !CONFIG_SCHED_DEBUG */
 
-# define sched_debug_enabled 0
+# define sched_debug_verbose 0
 # define sched_domain_debug(sd, cpu) do { } while (0)
 static inline bool sched_debug(void)
 {
@@ -2141,7 +2141,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct 
sched_domain_attr *att
if 

[tip: sched/core] sched,debug: Convert sysctl sched_domains to debugfs

2021-04-16 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: 3b87f136f8fccddf7da016ab7d04bb3cf9b180f0
Gitweb:
https://git.kernel.org/tip/3b87f136f8fccddf7da016ab7d04bb3cf9b180f0
Author:Peter Zijlstra 
AuthorDate:Thu, 25 Mar 2021 11:31:20 +01:00
Committer: Peter Zijlstra 
CommitterDate: Fri, 16 Apr 2021 17:06:35 +02:00

sched,debug: Convert sysctl sched_domains to debugfs

Stop polluting sysctl, move to debugfs for SCHED_DEBUG stuff.

Signed-off-by: Peter Zijlstra (Intel) 
Reviewed-by: Dietmar Eggemann 
Reviewed-by: Valentin Schneider 
Tested-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/YHgB/s4kcbq1i...@hirez.programming.kicks-ass.net
---
 kernel/sched/debug.c| 254 ---
 kernel/sched/sched.h|  10 +--
 kernel/sched/topology.c |   6 +-
 3 files changed, 59 insertions(+), 211 deletions(-)

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index bdd344f..b25de7b 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -299,6 +299,10 @@ static __init int sched_init_debug(void)
debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, 
_scaling_fops);
debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, 
_sched_migration_cost);
debugfs_create_u32("nr_migrate", 0644, debugfs_sched, 
_sched_nr_migrate);
+
+   mutex_lock(_domains_mutex);
+   update_sched_domain_debugfs();
+   mutex_unlock(_domains_mutex);
 #endif
 
 #ifdef CONFIG_NUMA_BALANCING
@@ -316,229 +320,88 @@ late_initcall(sched_init_debug);
 
 #ifdef CONFIG_SMP
 
-#ifdef CONFIG_SYSCTL
-
-static struct ctl_table sd_ctl_dir[] = {
-   {
-   .procname   = "sched_domain",
-   .mode   = 0555,
-   },
-   {}
-};
-
-static struct ctl_table sd_ctl_root[] = {
-   {
-   .procname   = "kernel",
-   .mode   = 0555,
-   .child  = sd_ctl_dir,
-   },
-   {}
-};
-
-static struct ctl_table *sd_alloc_ctl_entry(int n)
-{
-   struct ctl_table *entry =
-   kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
-
-   return entry;
-}
-
-static void sd_free_ctl_entry(struct ctl_table **tablep)
-{
-   struct ctl_table *entry;
-
-   /*
-* In the intermediate directories, both the child directory and
-* procname are dynamically allocated and could fail but the mode
-* will always be set. In the lowest directory the names are
-* static strings and all have proc handlers.
-*/
-   for (entry = *tablep; entry->mode; entry++) {
-   if (entry->child)
-   sd_free_ctl_entry(>child);
-   if (entry->proc_handler == NULL)
-   kfree(entry->procname);
-   }
-
-   kfree(*tablep);
-   *tablep = NULL;
-}
-
-static void
-set_table_entry(struct ctl_table *entry,
-   const char *procname, void *data, int maxlen,
-   umode_t mode, proc_handler *proc_handler)
-{
-   entry->procname = procname;
-   entry->data = data;
-   entry->maxlen = maxlen;
-   entry->mode = mode;
-   entry->proc_handler = proc_handler;
-}
+static cpumask_var_t   sd_sysctl_cpus;
+static struct dentry   *sd_dentry;
 
-static int sd_ctl_doflags(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
+static int sd_flags_show(struct seq_file *m, void *v)
 {
-   unsigned long flags = *(unsigned long *)table->data;
-   size_t data_size = 0;
-   size_t len = 0;
-   char *tmp, *buf;
+   unsigned long flags = *(unsigned int *)m->private;
int idx;
 
-   if (write)
-   return 0;
-
-   for_each_set_bit(idx, , __SD_FLAG_CNT) {
-   char *name = sd_flag_debug[idx].name;
-
-   /* Name plus whitespace */
-   data_size += strlen(name) + 1;
-   }
-
-   if (*ppos > data_size) {
-   *lenp = 0;
-   return 0;
-   }
-
-   buf = kcalloc(data_size + 1, sizeof(*buf), GFP_KERNEL);
-   if (!buf)
-   return -ENOMEM;
-
for_each_set_bit(idx, , __SD_FLAG_CNT) {
-   char *name = sd_flag_debug[idx].name;
-
-   len += snprintf(buf + len, strlen(name) + 2, "%s ", name);
-   }
-
-   tmp = buf + *ppos;
-   len -= *ppos;
-
-   if (len > *lenp)
-   len = *lenp;
-   if (len)
-   memcpy(buffer, tmp, len);
-   if (len < *lenp) {
-   ((char *)buffer)[len] = '\n';
-   len++;
+   seq_puts(m, sd_flag_debug[idx].name);
+   seq_puts(m, " ");
}
-
-   *lenp = len;
-   *ppos += len;
-
-   kfree(buf);
+   seq_puts(m, "\n");
 
return 0;
 }
 
-static struct ctl_table *
-sd_alloc_ctl_domain_table(struct sched_domain *sd)
-{
-   struct ctl_table *table = 

[tip: sched/core] debugfs: Implement debugfs_create_str()

2021-04-16 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: 9af0440ec86ebdab075e1b3d231f81fe7decb575
Gitweb:
https://git.kernel.org/tip/9af0440ec86ebdab075e1b3d231f81fe7decb575
Author:Peter Zijlstra 
AuthorDate:Thu, 25 Mar 2021 10:53:55 +01:00
Committer: Peter Zijlstra 
CommitterDate: Fri, 16 Apr 2021 17:06:34 +02:00

debugfs: Implement debugfs_create_str()

Implement debugfs_create_str() to easily display names and such in
debugfs.

Signed-off-by: Peter Zijlstra (Intel) 
Reviewed-by: Greg Kroah-Hartman 
Tested-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210412102001.415407...@infradead.org
---
 fs/debugfs/file.c   | 91 -
 include/linux/debugfs.h | 17 +++-
 2 files changed, 108 insertions(+)

diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 686e0ad..9b78e9e 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -865,6 +865,97 @@ struct dentry *debugfs_create_bool(const char *name, 
umode_t mode,
 }
 EXPORT_SYMBOL_GPL(debugfs_create_bool);
 
+ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+   struct dentry *dentry = F_DENTRY(file);
+   char *str, *copy = NULL;
+   int copy_len, len;
+   ssize_t ret;
+
+   ret = debugfs_file_get(dentry);
+   if (unlikely(ret))
+   return ret;
+
+   str = *(char **)file->private_data;
+   len = strlen(str) + 1;
+   copy = kmalloc(len, GFP_KERNEL);
+   if (!copy) {
+   debugfs_file_put(dentry);
+   return -ENOMEM;
+   }
+
+   copy_len = strscpy(copy, str, len);
+   debugfs_file_put(dentry);
+   if (copy_len < 0) {
+   kfree(copy);
+   return copy_len;
+   }
+
+   copy[copy_len] = '\n';
+
+   ret = simple_read_from_buffer(user_buf, count, ppos, copy, copy_len);
+   kfree(copy);
+
+   return ret;
+}
+
+static ssize_t debugfs_write_file_str(struct file *file, const char __user 
*user_buf,
+ size_t count, loff_t *ppos)
+{
+   /* This is really only for read-only strings */
+   return -EINVAL;
+}
+
+static const struct file_operations fops_str = {
+   .read = debugfs_read_file_str,
+   .write =debugfs_write_file_str,
+   .open = simple_open,
+   .llseek =   default_llseek,
+};
+
+static const struct file_operations fops_str_ro = {
+   .read = debugfs_read_file_str,
+   .open = simple_open,
+   .llseek =   default_llseek,
+};
+
+static const struct file_operations fops_str_wo = {
+   .write =debugfs_write_file_str,
+   .open = simple_open,
+   .llseek =   default_llseek,
+};
+
+/**
+ * debugfs_create_str - create a debugfs file that is used to read and write a 
string value
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have
+ * @parent: a pointer to the parent dentry for this file.  This should be a
+ *  directory dentry if set.  If this parameter is %NULL, then the
+ *  file will be created in the root of the debugfs filesystem.
+ * @value: a pointer to the variable that the file should read to and write
+ * from.
+ *
+ * This function creates a file in debugfs with the given name that
+ * contains the value of the variable @value.  If the @mode variable is so
+ * set, it can be read from, and written to.
+ *
+ * This function will return a pointer to a dentry if it succeeds.  This
+ * pointer must be passed to the debugfs_remove() function when the file is
+ * to be removed (no automatic cleanup happens if your module is unloaded,
+ * you are responsible here.)  If an error occurs, ERR_PTR(-ERROR) will be
+ * returned.
+ *
+ * If debugfs is not enabled in the kernel, the value ERR_PTR(-ENODEV) will
+ * be returned.
+ */
+void debugfs_create_str(const char *name, umode_t mode,
+   struct dentry *parent, char **value)
+{
+   debugfs_create_mode_unsafe(name, mode, parent, value, _str,
+  _str_ro, _str_wo);
+}
+
 static ssize_t read_file_blob(struct file *file, char __user *user_buf,
  size_t count, loff_t *ppos)
 {
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index d6c4cc9..1fdb434 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -128,6 +128,8 @@ void debugfs_create_atomic_t(const char *name, umode_t mode,
 struct dentry *parent, atomic_t *value);
 struct dentry *debugfs_create_bool(const char *name, umode_t mode,
  struct dentry *parent, bool *value);
+void debugfs_create_str(const char *name, umode_t mode,
+   struct dentry *parent, char **value);
 
 struct dentry *debugfs_create_blob(const 

[tip: sched/core] sched: Move /proc/sched_debug to debugfs

2021-04-16 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: d27e9ae2f244805bbdc730d85fba28685d2471e5
Gitweb:
https://git.kernel.org/tip/d27e9ae2f244805bbdc730d85fba28685d2471e5
Author:Peter Zijlstra 
AuthorDate:Thu, 25 Mar 2021 15:18:19 +01:00
Committer: Peter Zijlstra 
CommitterDate: Fri, 16 Apr 2021 17:06:35 +02:00

sched: Move /proc/sched_debug to debugfs

Signed-off-by: Peter Zijlstra (Intel) 
Reviewed-by: Greg Kroah-Hartman 
Tested-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210412102001.548833...@infradead.org
---
 kernel/sched/debug.c | 25 -
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index b25de7b..bf199d6 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -277,6 +277,20 @@ static const struct file_operations sched_dynamic_fops = {
 
 __read_mostly bool sched_debug_enabled;
 
+static const struct seq_operations sched_debug_sops;
+
+static int sched_debug_open(struct inode *inode, struct file *filp)
+{
+   return seq_open(filp, _debug_sops);
+}
+
+static const struct file_operations sched_debug_fops = {
+   .open   = sched_debug_open,
+   .read   = seq_read,
+   .llseek = seq_lseek,
+   .release= seq_release,
+};
+
 static struct dentry *debugfs_sched;
 
 static __init int sched_init_debug(void)
@@ -314,6 +328,8 @@ static __init int sched_init_debug(void)
debugfs_create_u32("scan_size_mb", 0644, numa, 
_numa_balancing_scan_size);
 #endif
 
+   debugfs_create_file("debug", 0444, debugfs_sched, NULL, 
_debug_fops);
+
return 0;
 }
 late_initcall(sched_init_debug);
@@ -847,15 +863,6 @@ static const struct seq_operations sched_debug_sops = {
.show   = sched_debug_show,
 };
 
-static int __init init_sched_debug_procfs(void)
-{
-   if (!proc_create_seq("sched_debug", 0444, NULL, _debug_sops))
-   return -ENOMEM;
-   return 0;
-}
-
-__initcall(init_sched_debug_procfs);
-
 #define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F))
 #define __P(F) __PS(#F, F)
 #define   P(F) __PS(#F, p->F)


[tip: perf/core] perf: Rework perf_event_exit_event()

2021-04-16 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the perf/core branch of tip:

Commit-ID: ef54c1a476aef7eef26fe13ea10dc090952c00f8
Gitweb:
https://git.kernel.org/tip/ef54c1a476aef7eef26fe13ea10dc090952c00f8
Author:Peter Zijlstra 
AuthorDate:Thu, 08 Apr 2021 12:35:56 +02:00
Committer: Peter Zijlstra 
CommitterDate: Fri, 16 Apr 2021 16:32:40 +02:00

perf: Rework perf_event_exit_event()

Make perf_event_exit_event() more robust, such that we can use it from
other contexts. Specifically the up and coming remove_on_exec.

For this to work we need to address a few issues. Remove_on_exec will
not destroy the entire context, so we cannot rely on TASK_TOMBSTONE to
disable event_function_call() and we thus have to use
perf_remove_from_context().

When using perf_remove_from_context(), there's two races to consider.
The first is against close(), where we can have concurrent tear-down
of the event. The second is against child_list iteration, which should
not find a half baked event.

To address this, teach perf_remove_from_context() to special case
!ctx->is_active and about DETACH_CHILD.

[ el...@google.com: fix racing parent/child exit in sync_child_event(). ]
Signed-off-by: Marco Elver 
Signed-off-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20210408103605.1676875-2-el...@google.com
---
 include/linux/perf_event.h |   1 +-
 kernel/events/core.c   | 142 
 2 files changed, 80 insertions(+), 63 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3f7f89e..3d478ab 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -607,6 +607,7 @@ struct swevent_hlist {
 #define PERF_ATTACH_TASK_DATA  0x08
 #define PERF_ATTACH_ITRACE 0x10
 #define PERF_ATTACH_SCHED_CB   0x20
+#define PERF_ATTACH_CHILD  0x40
 
 struct perf_cgroup;
 struct perf_buffer;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f079431..318ff7b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2205,6 +2205,26 @@ out:
perf_event__header_size(leader);
 }
 
+static void sync_child_event(struct perf_event *child_event);
+
+static void perf_child_detach(struct perf_event *event)
+{
+   struct perf_event *parent_event = event->parent;
+
+   if (!(event->attach_state & PERF_ATTACH_CHILD))
+   return;
+
+   event->attach_state &= ~PERF_ATTACH_CHILD;
+
+   if (WARN_ON_ONCE(!parent_event))
+   return;
+
+   lockdep_assert_held(_event->child_mutex);
+
+   sync_child_event(event);
+   list_del_init(>child_list);
+}
+
 static bool is_orphaned_event(struct perf_event *event)
 {
return event->state == PERF_EVENT_STATE_DEAD;
@@ -2312,6 +2332,7 @@ group_sched_out(struct perf_event *group_event,
 }
 
 #define DETACH_GROUP   0x01UL
+#define DETACH_CHILD   0x02UL
 
 /*
  * Cross CPU call to remove a performance event
@@ -2335,6 +2356,8 @@ __perf_remove_from_context(struct perf_event *event,
event_sched_out(event, cpuctx, ctx);
if (flags & DETACH_GROUP)
perf_group_detach(event);
+   if (flags & DETACH_CHILD)
+   perf_child_detach(event);
list_del_event(event, ctx);
 
if (!ctx->nr_events && ctx->is_active) {
@@ -2363,25 +2386,21 @@ static void perf_remove_from_context(struct perf_event 
*event, unsigned long fla
 
lockdep_assert_held(>mutex);
 
-   event_function_call(event, __perf_remove_from_context, (void *)flags);
-
/*
-* The above event_function_call() can NO-OP when it hits
-* TASK_TOMBSTONE. In that case we must already have been detached
-* from the context (by perf_event_exit_event()) but the grouping
-* might still be in-tact.
+* Because of perf_event_exit_task(), perf_remove_from_context() ought
+* to work in the face of TASK_TOMBSTONE, unlike every other
+* event_function_call() user.
 */
-   WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
-   if ((flags & DETACH_GROUP) &&
-   (event->attach_state & PERF_ATTACH_GROUP)) {
-   /*
-* Since in that case we cannot possibly be scheduled, simply
-* detach now.
-*/
-   raw_spin_lock_irq(>lock);
-   perf_group_detach(event);
+   raw_spin_lock_irq(>lock);
+   if (!ctx->is_active) {
+   __perf_remove_from_context(event, __get_cpu_context(ctx),
+  ctx, (void *)flags);
raw_spin_unlock_irq(>lock);
+   return;
}
+   raw_spin_unlock_irq(>lock);
+
+   event_function_call(event, __perf_remove_from_context, (void *)flags);
 }
 
 /*
@@ -12377,14 +12396,17 @@ void perf_pmu_migrate_context(struct pmu *pmu, int 
src_cpu, int dst_cpu)
 }
 EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
 
-static void sync_child_event(struct perf_event *child_event,
-   

[tip: locking/core] static_call: Relax static_call_update() function argument type

2021-04-09 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the locking/core branch of tip:

Commit-ID: 9432bbd969c667fc9c4b1c140c5a745ff2a7b540
Gitweb:
https://git.kernel.org/tip/9432bbd969c667fc9c4b1c140c5a745ff2a7b540
Author:Peter Zijlstra 
AuthorDate:Tue, 23 Mar 2021 16:49:03 +01:00
Committer: Peter Zijlstra 
CommitterDate: Fri, 09 Apr 2021 13:22:12 +02:00

static_call: Relax static_call_update() function argument type

static_call_update() had stronger type requirements than regular C,
relax them to match. Instead of requiring the @func argument has the
exact matching type, allow any type which C is willing to promote to the
right (function) pointer type. Specifically this allows (void *)
arguments.

This cleans up a bunch of static_call_update() callers for
PREEMPT_DYNAMIC and should get around silly GCC11 warnings for free.

Signed-off-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/yfon7ncl8ofgt...@hirez.programming.kicks-ass.net
---
 include/linux/static_call.h |  4 ++--
 kernel/sched/core.c | 18 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index 85ecc78..8d50f62 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -113,9 +113,9 @@ extern void arch_static_call_transform(void *site, void 
*tramp, void *func, bool
 
 #define static_call_update(name, func) \
 ({ \
-   BUILD_BUG_ON(!__same_type(*(func), STATIC_CALL_TRAMP(name)));   \
+   typeof(_CALL_TRAMP(name)) __F = (func);  \
__static_call_update(_CALL_KEY(name),\
-STATIC_CALL_TRAMP_ADDR(name), func);   \
+STATIC_CALL_TRAMP_ADDR(name), __F);\
 })
 
 #ifdef CONFIG_HAVE_STATIC_CALL_INLINE
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9819121..67f9890 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5396,25 +5396,25 @@ static void sched_dynamic_update(int mode)
switch (mode) {
case preempt_dynamic_none:
static_call_update(cond_resched, __cond_resched);
-   static_call_update(might_resched, (typeof(&__cond_resched)) 
__static_call_return0);
-   static_call_update(preempt_schedule, 
(typeof(_schedule)) NULL);
-   static_call_update(preempt_schedule_notrace, 
(typeof(_schedule_notrace)) NULL);
-   static_call_update(irqentry_exit_cond_resched, 
(typeof(_exit_cond_resched)) NULL);
+   static_call_update(might_resched, (void 
*)&__static_call_return0);
+   static_call_update(preempt_schedule, NULL);
+   static_call_update(preempt_schedule_notrace, NULL);
+   static_call_update(irqentry_exit_cond_resched, NULL);
pr_info("Dynamic Preempt: none\n");
break;
 
case preempt_dynamic_voluntary:
static_call_update(cond_resched, __cond_resched);
static_call_update(might_resched, __cond_resched);
-   static_call_update(preempt_schedule, 
(typeof(_schedule)) NULL);
-   static_call_update(preempt_schedule_notrace, 
(typeof(_schedule_notrace)) NULL);
-   static_call_update(irqentry_exit_cond_resched, 
(typeof(_exit_cond_resched)) NULL);
+   static_call_update(preempt_schedule, NULL);
+   static_call_update(preempt_schedule_notrace, NULL);
+   static_call_update(irqentry_exit_cond_resched, NULL);
pr_info("Dynamic Preempt: voluntary\n");
break;
 
case preempt_dynamic_full:
-   static_call_update(cond_resched, (typeof(&__cond_resched)) 
__static_call_return0);
-   static_call_update(might_resched, (typeof(&__cond_resched)) 
__static_call_return0);
+   static_call_update(cond_resched, (void 
*)&__static_call_return0);
+   static_call_update(might_resched, (void 
*)&__static_call_return0);
static_call_update(preempt_schedule, __preempt_schedule_func);
static_call_update(preempt_schedule_notrace, 
__preempt_schedule_notrace_func);
static_call_update(irqentry_exit_cond_resched, 
irqentry_exit_cond_resched);


[tip: x86/core] x86/cpu: Resort and comment Intel models

2021-04-08 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 53375a5a218e7ea0ac18087946b5391f749b764f
Gitweb:
https://git.kernel.org/tip/53375a5a218e7ea0ac18087946b5391f749b764f
Author:Peter Zijlstra 
AuthorDate:Mon, 15 Mar 2021 17:12:53 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 08 Apr 2021 14:22:10 +02:00

x86/cpu: Resort and comment Intel models

The INTEL_FAM6 list has become a mess again. Try and bring some sanity
back into it.

Where previously we had one microarch per year and a number of SKUs
within that, this no longer seems to be the case. We now get different
uarch names that share a 'core' design.

Add the core name starting at skylake and reorder to keep the cores
in chronological order. Furthermore, Intel marketed the names {Amber,
Coffee, Whiskey} Lake, but those are in fact steppings of Kaby Lake, add
comments for them.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Link: https://lkml.kernel.org/r/ye+hhs8i0gshh...@hirez.programming.kicks-ass.net
---
 arch/x86/include/asm/intel-family.h | 50 +++-
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/intel-family.h 
b/arch/x86/include/asm/intel-family.h
index 9abe842..b15262f 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -32,7 +32,9 @@
  * _EP - 2 socket server parts
  * _EX - 4+ socket server parts
  *
- * The #define line may optionally include a comment including platform names.
+ * The #define line may optionally include a comment including platform or core
+ * names. An exception is made for kabylake where steppings seem to have gotten
+ * their own names :-(
  */
 
 /* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */
@@ -69,35 +71,39 @@
 #define INTEL_FAM6_BROADWELL_X 0x4F
 #define INTEL_FAM6_BROADWELL_D 0x56
 
-#define INTEL_FAM6_SKYLAKE_L   0x4E
-#define INTEL_FAM6_SKYLAKE 0x5E
-#define INTEL_FAM6_SKYLAKE_X   0x55
-#define INTEL_FAM6_KABYLAKE_L  0x8E
-#define INTEL_FAM6_KABYLAKE0x9E
+#define INTEL_FAM6_SKYLAKE_L   0x4E/* Sky Lake */
+#define INTEL_FAM6_SKYLAKE 0x5E/* Sky Lake */
+#define INTEL_FAM6_SKYLAKE_X   0x55/* Sky Lake */
 
-#define INTEL_FAM6_CANNONLAKE_L0x66
+#define INTEL_FAM6_KABYLAKE_L  0x8E/* Sky Lake */
+/* AMBERLAKE_L 0x8E   Sky Lake -- s: 9 */
+/* COFFEELAKE_L0x8E   Sky Lake -- s: 10
*/
+/* WHISKEYLAKE_L   0x8E   Sky Lake -- s: 11,12 */
 
-#define INTEL_FAM6_ICELAKE_X   0x6A
-#define INTEL_FAM6_ICELAKE_D   0x6C
-#define INTEL_FAM6_ICELAKE 0x7D
-#define INTEL_FAM6_ICELAKE_L   0x7E
-#define INTEL_FAM6_ICELAKE_NNPI0x9D
+#define INTEL_FAM6_KABYLAKE0x9E/* Sky Lake */
+/* COFFEELAKE  0x9E   Sky Lake -- s: 10-13 */
 
-#define INTEL_FAM6_TIGERLAKE_L 0x8C
-#define INTEL_FAM6_TIGERLAKE   0x8D
+#define INTEL_FAM6_COMETLAKE   0xA5/* Sky Lake */
+#define INTEL_FAM6_COMETLAKE_L 0xA6/* Sky Lake */
 
-#define INTEL_FAM6_COMETLAKE   0xA5
-#define INTEL_FAM6_COMETLAKE_L 0xA6
+#define INTEL_FAM6_CANNONLAKE_L0x66/* Palm Cove */
 
-#define INTEL_FAM6_ROCKETLAKE  0xA7
+#define INTEL_FAM6_ICELAKE_X   0x6A/* Sunny Cove */
+#define INTEL_FAM6_ICELAKE_D   0x6C/* Sunny Cove */
+#define INTEL_FAM6_ICELAKE 0x7D/* Sunny Cove */
+#define INTEL_FAM6_ICELAKE_L   0x7E/* Sunny Cove */
+#define INTEL_FAM6_ICELAKE_NNPI0x9D/* Sunny Cove */
 
-#define INTEL_FAM6_SAPPHIRERAPIDS_X0x8F
+#define INTEL_FAM6_LAKEFIELD   0x8A/* Sunny Cove / Tremont */
 
-/* Hybrid Core/Atom Processors */
+#define INTEL_FAM6_ROCKETLAKE  0xA7/* Cypress Cove */
 
-#defineINTEL_FAM6_LAKEFIELD0x8A
-#define INTEL_FAM6_ALDERLAKE   0x97
-#define INTEL_FAM6_ALDERLAKE_L 0x9A
+#define INTEL_FAM6_TIGERLAKE_L 0x8C/* Willow Cove */
+#define INTEL_FAM6_TIGERLAKE   0x8D/* Willow Cove */
+#define INTEL_FAM6_SAPPHIRERAPIDS_X0x8F/* Willow Cove */
+
+#define INTEL_FAM6_ALDERLAKE   0x97/* Golden Cove / Gracemont */
+#define INTEL_FAM6_ALDERLAKE_L 0x9A/* Golden Cove / Gracemont */
 
 /* "Small Core" Processors (Atom) */
 


[tip: x86/core] x86/retpoline: Simplify retpolines

2021-04-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 119251855f9adf9421cb5eb409933092141ab2c7
Gitweb:
https://git.kernel.org/tip/119251855f9adf9421cb5eb409933092141ab2c7
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:02 +01:00
Committer: Ingo Molnar 
CommitterDate: Fri, 02 Apr 2021 12:42:04 +02:00

x86/retpoline: Simplify retpolines

Due to:

  c9c324dc22aa ("objtool: Support stack layout changes in alternatives")

it is now possible to simplify the retpolines.

Currently our retpolines consist of 2 symbols:

 - __x86_indirect_thunk_\reg: the compiler target
 - __x86_retpoline_\reg:  the actual retpoline.

Both are consecutive in code and aligned such that for any one register
they both live in the same cacheline:

   <__x86_indirect_thunk_rax>:
   0:   ff e0   jmpq   *%rax
   2:   90  nop
   3:   90  nop
   4:   90  nop

  0005 <__x86_retpoline_rax>:
   5:   e8 07 00 00 00  callq  11 <__x86_retpoline_rax+0xc>
   a:   f3 90   pause
   c:   0f ae e8lfence
   f:   eb f9   jmpa <__x86_retpoline_rax+0x5>
  11:   48 89 04 24 mov%rax,(%rsp)
  15:   c3  retq
  16:   66 2e 0f 1f 84 00 00 00 00 00   nopw   %cs:0x0(%rax,%rax,1)

The thunk is an alternative_2, where one option is a JMP to the
retpoline. This was done so that objtool didn't need to deal with
alternatives with stack ops. But that problem has been solved, so now
it is possible to fold the entire retpoline into the alternative to
simplify and consolidate unused bytes:

   <__x86_indirect_thunk_rax>:
   0:   ff e0   jmpq   *%rax
   2:   90  nop
   3:   90  nop
   4:   90  nop
   5:   90  nop
   6:   90  nop
   7:   90  nop
   8:   90  nop
   9:   90  nop
   a:   90  nop
   b:   90  nop
   c:   90  nop
   d:   90  nop
   e:   90  nop
   f:   90  nop
  10:   90  nop
  11:   66 66 2e 0f 1f 84 00 00 00 00 00data16 nopw %cs:0x0(%rax,%rax,1)
  1c:   0f 1f 40 00 nopl   0x0(%rax)

Notice that since the longest alternative sequence is now:

   0:   e8 07 00 00 00  callq  c <.altinstr_replacement+0xc>
   5:   f3 90   pause
   7:   0f ae e8lfence
   a:   eb f9   jmp5 <.altinstr_replacement+0x5>
   c:   48 89 04 24 mov%rax,(%rsp)
  10:   c3  retq

17 bytes, we have 15 bytes NOP at the end of our 32 byte slot. (IOW, if
we can shrink the retpoline by 1 byte we can pack it more densely).

 [ bp: Massage commit message. ]

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Signed-off-by: Ingo Molnar 
Link: https://lkml.kernel.org/r/20210326151259.506071...@infradead.org
---
 arch/x86/include/asm/asm-prototypes.h |  7 +-
 arch/x86/include/asm/nospec-branch.h  |  6 ++---
 arch/x86/lib/retpoline.S  | 34 +-
 tools/objtool/check.c |  3 +--
 4 files changed, 21 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/asm-prototypes.h 
b/arch/x86/include/asm/asm-prototypes.h
index 51e2bf2..0545b07 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -22,15 +22,8 @@ extern void cmpxchg8b_emu(void);
 #define DECL_INDIRECT_THUNK(reg) \
extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
 
-#define DECL_RETPOLINE(reg) \
-   extern asmlinkage void __x86_retpoline_ ## reg (void);
-
 #undef GEN
 #define GEN(reg) DECL_INDIRECT_THUNK(reg)
 #include 
 
-#undef GEN
-#define GEN(reg) DECL_RETPOLINE(reg)
-#include 
-
 #endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/nospec-branch.h 
b/arch/x86/include/asm/nospec-branch.h
index 529f8e9..664be73 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -80,7 +80,7 @@
 .macro JMP_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
- __stringify(jmp __x86_retpoline_\reg), 
X86_FEATURE_RETPOLINE, \
+ __stringify(jmp __x86_indirect_thunk_\reg), 
X86_FEATURE_RETPOLINE, \
  __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), 
X86_FEATURE_RETPOLINE_AMD
 #else
jmp *%\reg
@@ -90,7 +90,7 @@
 .macro CALL_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
- __stringify(call __x86_retpoline_\reg), 
X86_FEATURE_RETPOLINE, \
+   

[tip: x86/core] x86/alternatives: Optimize optimize_nops()

2021-04-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 23c1ad538f4f371bdb67d8a112314842d5db7e5a
Gitweb:
https://git.kernel.org/tip/23c1ad538f4f371bdb67d8a112314842d5db7e5a
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:01 +01:00
Committer: Ingo Molnar 
CommitterDate: Fri, 02 Apr 2021 12:41:17 +02:00

x86/alternatives: Optimize optimize_nops()

Currently, optimize_nops() scans to see if the alternative starts with
NOPs. However, the emit pattern is:

  141:  \oldinstr
  142:  .skip (len-(142b-141b)), 0x90

That is, when 'oldinstr' is short, the tail is padded with NOPs. This case
never gets optimized.

Rewrite optimize_nops() to replace any trailing string of NOPs inside
the alternative to larger NOPs. Also run it irrespective of patching,
replacing NOPs in both the original and replaced code.

A direct consequence is that 'padlen' becomes superfluous, so remove it.

 [ bp:
   - Adjust commit message
   - remove a stale comment about needing to pad
   - add a comment in optimize_nops()
   - exit early if the NOP verif. loop catches a mismatch - function
 should not not add NOPs in that case
   - fix the "optimized NOPs" offsets output ]

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Signed-off-by: Ingo Molnar 
Link: https://lkml.kernel.org/r/20210326151259.442992...@infradead.org
---
 arch/x86/include/asm/alternative.h| 17 +-
 arch/x86/kernel/alternative.c | 49 +++---
 tools/objtool/arch/x86/include/arch/special.h |  2 +-
 3 files changed, 37 insertions(+), 31 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h 
b/arch/x86/include/asm/alternative.h
index 17b3609..a3c2315 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -65,7 +65,6 @@ struct alt_instr {
u16 cpuid;  /* cpuid bit set for replacement */
u8  instrlen;   /* length of original instruction */
u8  replacementlen; /* length of new instruction */
-   u8  padlen; /* length of build-time padding */
 } __packed;
 
 /*
@@ -104,7 +103,6 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
 
 #define alt_end_marker "663"
 #define alt_slen   "662b-661b"
-#define alt_pad_lenalt_end_marker"b-662b"
 #define alt_total_slen alt_end_marker"b-661b"
 #define alt_rlen(num)  e_replacement(num)"f-"b_replacement(num)"f"
 
@@ -151,8 +149,7 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
" .long " b_replacement(num)"f - .\n"   /* new instruction */ \
" .word " __stringify(feature) "\n" /* feature bit */ \
" .byte " alt_total_slen "\n"   /* source len  */ \
-   " .byte " alt_rlen(num) "\n"/* replacement len */ \
-   " .byte " alt_pad_len "\n"  /* pad len */
+   " .byte " alt_rlen(num) "\n"/* replacement len */
 
 #define ALTINSTR_REPLACEMENT(newinstr, num)/* replacement */   
\
"# ALT: replacement " #num "\n" 
\
@@ -224,9 +221,6 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
  * Peculiarities:
  * No memory clobber here.
  * Argument numbers start with 1.
- * Best is to use constraints that are fixed size (like (%1) ... "r")
- * If you use variable sized constraints like "m" or "g" in the
- * replacement make sure to pad to the worst case length.
  * Leaving an unused argument 0 to keep API compatibility.
  */
 #define alternative_input(oldinstr, newinstr, feature, input...)   \
@@ -315,13 +309,12 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
  * enough information for the alternatives patching code to patch an
  * instruction. See apply_alternatives().
  */
-.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
+.macro altinstruction_entry orig alt feature orig_len alt_len
.long \orig - .
.long \alt - .
.word \feature
.byte \orig_len
.byte \alt_len
-   .byte \pad_len
 .endm
 
 /*
@@ -338,7 +331,7 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
 142:
 
.pushsection .altinstructions,"a"
-   altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
+   altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f
.popsection
 
.pushsection .altinstr_replacement,"ax"
@@ -375,8 +368,8 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
 142:
 
.pushsection .altinstructions,"a"
-   altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
-   altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
+   altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f
+   

[tip: x86/core] objtool: Handle per arch retpoline naming

2021-04-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 530b4ddd9dd92b263081f5c7786d39a8129c8b2d
Gitweb:
https://git.kernel.org/tip/530b4ddd9dd92b263081f5c7786d39a8129c8b2d
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:04 +01:00
Committer: Ingo Molnar 
CommitterDate: Fri, 02 Apr 2021 12:43:02 +02:00

objtool: Handle per arch retpoline naming

The __x86_indirect_ naming is obviously not generic. Shorten to allow
matching some additional magic names later.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Signed-off-by: Ingo Molnar 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151259.630296...@infradead.org
---
 tools/objtool/arch/x86/decode.c  |  5 +
 tools/objtool/check.c|  9 +++--
 tools/objtool/include/objtool/arch.h |  2 ++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index ba9ebff..782894e 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -648,3 +648,8 @@ int arch_decode_hint_reg(struct instruction *insn, u8 
sp_reg)
 
return 0;
 }
+
+bool arch_is_retpoline(struct symbol *sym)
+{
+   return !strncmp(sym->name, "__x86_indirect_", 15);
+}
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 519af4b..6fbc001 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -850,6 +850,11 @@ static int add_ignore_alternatives(struct objtool_file 
*file)
return 0;
 }
 
+__weak bool arch_is_retpoline(struct symbol *sym)
+{
+   return false;
+}
+
 /*
  * Find the destination instructions for all jumps.
  */
@@ -872,7 +877,7 @@ static int add_jump_destinations(struct objtool_file *file)
} else if (reloc->sym->type == STT_SECTION) {
dest_sec = reloc->sym->sec;
dest_off = arch_dest_reloc_offset(reloc->addend);
-   } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 
21)) {
+   } else if (arch_is_retpoline(reloc->sym)) {
/*
 * Retpoline jumps are really dynamic jumps in
 * disguise, so convert them accordingly.
@@ -1026,7 +1031,7 @@ static int add_call_destinations(struct objtool_file 
*file)
return -1;
}
 
-   } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 
21)) {
+   } else if (arch_is_retpoline(reloc->sym)) {
/*
 * Retpoline calls are really dynamic calls in
 * disguise, so convert them accordingly.
diff --git a/tools/objtool/include/objtool/arch.h 
b/tools/objtool/include/objtool/arch.h
index 6ff0685..bb30993 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -86,4 +86,6 @@ const char *arch_nop_insn(int len);
 
 int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg);
 
+bool arch_is_retpoline(struct symbol *sym);
+
 #endif /* _ARCH_H */


[tip: x86/core] objtool: Correctly handle retpoline thunk calls

2021-04-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: bcb1b6ff39da7e8a6a986eb08126fba2b5e13c32
Gitweb:
https://git.kernel.org/tip/bcb1b6ff39da7e8a6a986eb08126fba2b5e13c32
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:03 +01:00
Committer: Ingo Molnar 
CommitterDate: Fri, 02 Apr 2021 12:42:54 +02:00

objtool: Correctly handle retpoline thunk calls

Just like JMP handling, convert a direct CALL to a retpoline thunk
into a retpoline safe indirect CALL.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Signed-off-by: Ingo Molnar 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151259.567568...@infradead.org
---
 tools/objtool/check.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index d45f018..519af4b 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1025,6 +1025,18 @@ static int add_call_destinations(struct objtool_file 
*file)
  dest_off);
return -1;
}
+
+   } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 
21)) {
+   /*
+* Retpoline calls are really dynamic calls in
+* disguise, so convert them accordingly.
+*/
+   insn->type = INSN_CALL_DYNAMIC;
+   insn->retpoline_safe = true;
+
+   remove_insn_ops(insn);
+   continue;
+
} else
insn->call_dest = reloc->sym;
 


[tip: x86/core] objtool: Rework the elf_rebuild_reloc_section() logic

2021-04-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 3a647607b57ad8346e659ddd3b951ac292c83690
Gitweb:
https://git.kernel.org/tip/3a647607b57ad8346e659ddd3b951ac292c83690
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:06 +01:00
Committer: Ingo Molnar 
CommitterDate: Fri, 02 Apr 2021 12:43:32 +02:00

objtool: Rework the elf_rebuild_reloc_section() logic

Instead of manually calling elf_rebuild_reloc_section() on sections
we've called elf_add_reloc() on, have elf_write() DTRT.

This makes it easier to add random relocations in places without
carefully tracking when we're done and need to flush what section.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Signed-off-by: Ingo Molnar 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151259.754213...@infradead.org
---
 tools/objtool/check.c   |  6 --
 tools/objtool/elf.c | 20 ++--
 tools/objtool/include/objtool/elf.h |  1 -
 tools/objtool/orc_gen.c |  3 ---
 4 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 8618d03..1d0415b 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -542,9 +542,6 @@ static int create_static_call_sections(struct objtool_file 
*file)
idx++;
}
 
-   if (elf_rebuild_reloc_section(file->elf, reloc_sec))
-   return -1;
-
return 0;
 }
 
@@ -614,9 +611,6 @@ static int create_mcount_loc_sections(struct objtool_file 
*file)
idx++;
}
 
-   if (elf_rebuild_reloc_section(file->elf, reloc_sec))
-   return -1;
-
return 0;
 }
 
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 93fa833..374813e 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -479,6 +479,8 @@ void elf_add_reloc(struct elf *elf, struct reloc *reloc)
 
list_add_tail(>list, >reloc_list);
elf_hash_add(elf->reloc_hash, >hash, reloc_hash(reloc));
+
+   sec->changed = true;
 }
 
 static int read_rel_reloc(struct section *sec, int i, struct reloc *reloc, 
unsigned int *symndx)
@@ -558,7 +560,9 @@ static int read_relocs(struct elf *elf)
return -1;
}
 
-   elf_add_reloc(elf, reloc);
+   list_add_tail(>list, >reloc_list);
+   elf_hash_add(elf->reloc_hash, >hash, 
reloc_hash(reloc));
+
nr_reloc++;
}
max_reloc = max(max_reloc, nr_reloc);
@@ -873,14 +877,11 @@ static int elf_rebuild_rela_reloc_section(struct section 
*sec, int nr)
return 0;
 }
 
-int elf_rebuild_reloc_section(struct elf *elf, struct section *sec)
+static int elf_rebuild_reloc_section(struct elf *elf, struct section *sec)
 {
struct reloc *reloc;
int nr;
 
-   sec->changed = true;
-   elf->changed = true;
-
nr = 0;
list_for_each_entry(reloc, >reloc_list, list)
nr++;
@@ -944,9 +945,15 @@ int elf_write(struct elf *elf)
struct section *sec;
Elf_Scn *s;
 
-   /* Update section headers for changed sections: */
+   /* Update changed relocation sections and section headers: */
list_for_each_entry(sec, >sections, list) {
if (sec->changed) {
+   if (sec->base &&
+   elf_rebuild_reloc_section(elf, sec)) {
+   WARN("elf_rebuild_reloc_section");
+   return -1;
+   }
+
s = elf_getscn(elf->elf, sec->idx);
if (!s) {
WARN_ELF("elf_getscn");
@@ -958,6 +965,7 @@ int elf_write(struct elf *elf)
}
 
sec->changed = false;
+   elf->changed = true;
}
}
 
diff --git a/tools/objtool/include/objtool/elf.h 
b/tools/objtool/include/objtool/elf.h
index e6890cc..fc576ed 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -142,7 +142,6 @@ struct reloc *find_reloc_by_dest_range(const struct elf 
*elf, struct section *se
 struct symbol *find_func_containing(struct section *sec, unsigned long offset);
 void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset,
  struct reloc *reloc);
-int elf_rebuild_reloc_section(struct elf *elf, struct section *sec);
 
 #define for_each_sec(file, sec)
\
list_for_each_entry(sec, >elf->sections, list)
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 738aa50..f534708 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -254,8 +254,5 @@ int orc_create(struct objtool_file *file)
return -1;

[tip: x86/core] objtool: Fix static_call list generation

2021-04-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: a958c4fea768d2c378c89032ab41d38da2a24422
Gitweb:
https://git.kernel.org/tip/a958c4fea768d2c378c89032ab41d38da2a24422
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:05 +01:00
Committer: Ingo Molnar 
CommitterDate: Fri, 02 Apr 2021 12:43:19 +02:00

objtool: Fix static_call list generation

Currently, objtool generates tail call entries in add_jump_destination()
but waits until validate_branch() to generate the regular call entries.
Move these to add_call_destination() for consistency.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Signed-off-by: Ingo Molnar 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151259.691529...@infradead.org
---
 tools/objtool/check.c | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 6fbc001..8618d03 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1045,6 +1045,11 @@ static int add_call_destinations(struct objtool_file 
*file)
} else
insn->call_dest = reloc->sym;
 
+   if (insn->call_dest && insn->call_dest->static_call_tramp) {
+   list_add_tail(>static_call_node,
+ >static_call_list);
+   }
+
/*
 * Many compilers cannot disable KCOV with a function attribute
 * so they need a little help, NOP out any KCOV calls from 
noinstr
@@ -1788,6 +1793,9 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
 
+   /*
+* Must be before add_{jump_call}_destination.
+*/
ret = read_static_call_tramps(file);
if (ret)
return ret;
@@ -1800,6 +1808,10 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
 
+   /*
+* Must be before add_call_destination(); it changes INSN_CALL to
+* INSN_JUMP.
+*/
ret = read_intra_function_calls(file);
if (ret)
return ret;
@@ -2762,11 +2774,6 @@ static int validate_branch(struct objtool_file *file, 
struct symbol *func,
if (dead_end_function(file, insn->call_dest))
return 0;
 
-   if (insn->type == INSN_CALL && 
insn->call_dest->static_call_tramp) {
-   list_add_tail(>static_call_node,
- >static_call_list);
-   }
-
break;
 
case INSN_JUMP_CONDITIONAL:


[tip: x86/core] objtool: Create reloc sections implicitly

2021-04-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: d0c5c4cc73da0b05b0d9e5f833f2d859e1b45f8e
Gitweb:
https://git.kernel.org/tip/d0c5c4cc73da0b05b0d9e5f833f2d859e1b45f8e
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:08 +01:00
Committer: Ingo Molnar 
CommitterDate: Fri, 02 Apr 2021 12:44:37 +02:00

objtool: Create reloc sections implicitly

Have elf_add_reloc() create the relocation section implicitly.

Suggested-by: Josh Poimboeuf 
Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Signed-off-by: Ingo Molnar 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151259.880174...@infradead.org
---
 tools/objtool/check.c   |  6 --
 tools/objtool/elf.c |  9 -
 tools/objtool/include/objtool/elf.h |  1 -
 tools/objtool/orc_gen.c |  2 --
 4 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 61fe29a..600fa67 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -459,9 +459,6 @@ static int create_static_call_sections(struct objtool_file 
*file)
if (!sec)
return -1;
 
-   if (!elf_create_reloc_section(file->elf, sec, SHT_RELA))
-   return -1;
-
idx = 0;
list_for_each_entry(insn, >static_call_list, static_call_node) {
 
@@ -547,9 +544,6 @@ static int create_mcount_loc_sections(struct objtool_file 
*file)
if (!sec)
return -1;
 
-   if (!elf_create_reloc_section(file->elf, sec, SHT_RELA))
-   return -1;
-
idx = 0;
list_for_each_entry(insn, >mcount_loc_list, mcount_loc_node) {
 
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 0ab52ac..7b65ae3 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -447,11 +447,18 @@ err:
return -1;
 }
 
+static struct section *elf_create_reloc_section(struct elf *elf,
+   struct section *base,
+   int reltype);
+
 int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
  unsigned int type, struct symbol *sym, int addend)
 {
struct reloc *reloc;
 
+   if (!sec->reloc && !elf_create_reloc_section(elf, sec, SHT_RELA))
+   return -1;
+
reloc = malloc(sizeof(*reloc));
if (!reloc) {
perror("malloc");
@@ -829,7 +836,7 @@ static struct section *elf_create_rela_reloc_section(struct 
elf *elf, struct sec
return sec;
 }
 
-struct section *elf_create_reloc_section(struct elf *elf,
+static struct section *elf_create_reloc_section(struct elf *elf,
 struct section *base,
 int reltype)
 {
diff --git a/tools/objtool/include/objtool/elf.h 
b/tools/objtool/include/objtool/elf.h
index 825ad32..463f329 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -122,7 +122,6 @@ static inline u32 reloc_hash(struct reloc *reloc)
 
 struct elf *elf_open_read(const char *name, int flags);
 struct section *elf_create_section(struct elf *elf, const char *name, unsigned 
int sh_flags, size_t entsize, int nr);
-struct section *elf_create_reloc_section(struct elf *elf, struct section 
*base, int reltype);
 
 int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
  unsigned int type, struct symbol *sym, int addend);
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 1b57be6..dc9b7dd 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -225,8 +225,6 @@ int orc_create(struct objtool_file *file)
sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), 
nr);
if (!sec)
return -1;
-   if (!elf_create_reloc_section(file->elf, sec, SHT_RELA))
-   return -1;
 
/* Write ORC entries to sections: */
list_for_each_entry(entry, _list, list) {


[tip: x86/core] objtool: Add elf_create_reloc() helper

2021-04-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: ef47cc01cb4abcd760d8ac66b9361d6ade4d0846
Gitweb:
https://git.kernel.org/tip/ef47cc01cb4abcd760d8ac66b9361d6ade4d0846
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:07 +01:00
Committer: Ingo Molnar 
CommitterDate: Fri, 02 Apr 2021 12:44:18 +02:00

objtool: Add elf_create_reloc() helper

We have 4 instances of adding a relocation. Create a common helper
to avoid growing even more.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Signed-off-by: Ingo Molnar 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151259.817438...@infradead.org
---
 tools/objtool/check.c   | 78 +
 tools/objtool/elf.c | 86 ++--
 tools/objtool/include/objtool/elf.h | 10 ++-
 tools/objtool/orc_gen.c | 30 ++
 4 files changed, 85 insertions(+), 119 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 1d0415b..61fe29a 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -433,8 +433,7 @@ reachable:
 
 static int create_static_call_sections(struct objtool_file *file)
 {
-   struct section *sec, *reloc_sec;
-   struct reloc *reloc;
+   struct section *sec;
struct static_call_site *site;
struct instruction *insn;
struct symbol *key_sym;
@@ -460,8 +459,7 @@ static int create_static_call_sections(struct objtool_file 
*file)
if (!sec)
return -1;
 
-   reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
-   if (!reloc_sec)
+   if (!elf_create_reloc_section(file->elf, sec, SHT_RELA))
return -1;
 
idx = 0;
@@ -471,25 +469,11 @@ static int create_static_call_sections(struct 
objtool_file *file)
memset(site, 0, sizeof(struct static_call_site));
 
/* populate reloc for 'addr' */
-   reloc = malloc(sizeof(*reloc));
-
-   if (!reloc) {
-   perror("malloc");
-   return -1;
-   }
-   memset(reloc, 0, sizeof(*reloc));
-
-   insn_to_reloc_sym_addend(insn->sec, insn->offset, reloc);
-   if (!reloc->sym) {
-   WARN_FUNC("static call tramp: missing containing 
symbol",
- insn->sec, insn->offset);
+   if (elf_add_reloc_to_insn(file->elf, sec,
+ idx * sizeof(struct static_call_site),
+ R_X86_64_PC32,
+ insn->sec, insn->offset))
return -1;
-   }
-
-   reloc->type = R_X86_64_PC32;
-   reloc->offset = idx * sizeof(struct static_call_site);
-   reloc->sec = reloc_sec;
-   elf_add_reloc(file->elf, reloc);
 
/* find key symbol */
key_name = strdup(insn->call_dest->name);
@@ -526,18 +510,11 @@ static int create_static_call_sections(struct 
objtool_file *file)
free(key_name);
 
/* populate reloc for 'key' */
-   reloc = malloc(sizeof(*reloc));
-   if (!reloc) {
-   perror("malloc");
+   if (elf_add_reloc(file->elf, sec,
+ idx * sizeof(struct static_call_site) + 4,
+ R_X86_64_PC32, key_sym,
+ is_sibling_call(insn) * 
STATIC_CALL_SITE_TAIL))
return -1;
-   }
-   memset(reloc, 0, sizeof(*reloc));
-   reloc->sym = key_sym;
-   reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 
0;
-   reloc->type = R_X86_64_PC32;
-   reloc->offset = idx * sizeof(struct static_call_site) + 4;
-   reloc->sec = reloc_sec;
-   elf_add_reloc(file->elf, reloc);
 
idx++;
}
@@ -547,8 +524,7 @@ static int create_static_call_sections(struct objtool_file 
*file)
 
 static int create_mcount_loc_sections(struct objtool_file *file)
 {
-   struct section *sec, *reloc_sec;
-   struct reloc *reloc;
+   struct section *sec;
unsigned long *loc;
struct instruction *insn;
int idx;
@@ -571,8 +547,7 @@ static int create_mcount_loc_sections(struct objtool_file 
*file)
if (!sec)
return -1;
 
-   reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
-   if (!reloc_sec)
+   if (!elf_create_reloc_section(file->elf, sec, SHT_RELA))
return -1;
 
idx = 0;
@@ -581,32 +556,11 @@ static int create_mcount_loc_sections(struct objtool_file 
*file)
loc = (unsigned long *)sec->data->d_buf + idx;
memset(loc, 0, sizeof(unsigned 

[tip: x86/core] objtool: Extract elf_strtab_concat()

2021-04-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 417a4dc91e559f92404c2544f785b02ce75784c3
Gitweb:
https://git.kernel.org/tip/417a4dc91e559f92404c2544f785b02ce75784c3
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:09 +01:00
Committer: Ingo Molnar 
CommitterDate: Fri, 02 Apr 2021 12:44:56 +02:00

objtool: Extract elf_strtab_concat()

Create a common helper to append strings to a strtab.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Signed-off-by: Ingo Molnar 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151259.941474...@infradead.org
---
 tools/objtool/elf.c | 60 +++-
 1 file changed, 38 insertions(+), 22 deletions(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 7b65ae3..c278a04 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -673,13 +673,48 @@ err:
return NULL;
 }
 
+static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
+{
+   Elf_Data *data;
+   Elf_Scn *s;
+   int len;
+
+   if (!strtab)
+   strtab = find_section_by_name(elf, ".strtab");
+   if (!strtab) {
+   WARN("can't find .strtab section");
+   return -1;
+   }
+
+   s = elf_getscn(elf->elf, strtab->idx);
+   if (!s) {
+   WARN_ELF("elf_getscn");
+   return -1;
+   }
+
+   data = elf_newdata(s);
+   if (!data) {
+   WARN_ELF("elf_newdata");
+   return -1;
+   }
+
+   data->d_buf = str;
+   data->d_size = strlen(str) + 1;
+   data->d_align = 1;
+
+   len = strtab->len;
+   strtab->len += data->d_size;
+   strtab->changed = true;
+
+   return len;
+}
+
 struct section *elf_create_section(struct elf *elf, const char *name,
   unsigned int sh_flags, size_t entsize, int 
nr)
 {
struct section *sec, *shstrtab;
size_t size = entsize * nr;
Elf_Scn *s;
-   Elf_Data *data;
 
sec = malloc(sizeof(*sec));
if (!sec) {
@@ -736,7 +771,6 @@ struct section *elf_create_section(struct elf *elf, const 
char *name,
sec->sh.sh_addralign = 1;
sec->sh.sh_flags = SHF_ALLOC | sh_flags;
 
-
/* Add section name to .shstrtab (or .strtab for Clang) */
shstrtab = find_section_by_name(elf, ".shstrtab");
if (!shstrtab)
@@ -745,27 +779,9 @@ struct section *elf_create_section(struct elf *elf, const 
char *name,
WARN("can't find .shstrtab or .strtab section");
return NULL;
}
-
-   s = elf_getscn(elf->elf, shstrtab->idx);
-   if (!s) {
-   WARN_ELF("elf_getscn");
+   sec->sh.sh_name = elf_add_string(elf, shstrtab, sec->name);
+   if (sec->sh.sh_name == -1)
return NULL;
-   }
-
-   data = elf_newdata(s);
-   if (!data) {
-   WARN_ELF("elf_newdata");
-   return NULL;
-   }
-
-   data->d_buf = sec->name;
-   data->d_size = strlen(name) + 1;
-   data->d_align = 1;
-
-   sec->sh.sh_name = shstrtab->len;
-
-   shstrtab->len += strlen(name) + 1;
-   shstrtab->changed = true;
 
list_add_tail(>list, >sections);
elf_hash_add(elf->section_hash, >hash, sec->idx);


[tip: x86/core] objtool: Extract elf_symbol_add()

2021-04-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 9a7827b7789c630c1efdb121daa42c6e77dce97f
Gitweb:
https://git.kernel.org/tip/9a7827b7789c630c1efdb121daa42c6e77dce97f
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:10 +01:00
Committer: Ingo Molnar 
CommitterDate: Fri, 02 Apr 2021 12:45:01 +02:00

objtool: Extract elf_symbol_add()

Create a common helper to add symbols.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Signed-off-by: Ingo Molnar 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151300.003468...@infradead.org
---
 tools/objtool/elf.c | 56 
 1 file changed, 31 insertions(+), 25 deletions(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index c278a04..8457218 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -290,12 +290,39 @@ static int read_sections(struct elf *elf)
return 0;
 }
 
+static void elf_add_symbol(struct elf *elf, struct symbol *sym)
+{
+   struct list_head *entry;
+   struct rb_node *pnode;
+
+   sym->type = GELF_ST_TYPE(sym->sym.st_info);
+   sym->bind = GELF_ST_BIND(sym->sym.st_info);
+
+   sym->offset = sym->sym.st_value;
+   sym->len = sym->sym.st_size;
+
+   rb_add(>node, >sec->symbol_tree, symbol_to_offset);
+   pnode = rb_prev(>node);
+   if (pnode)
+   entry = _entry(pnode, struct symbol, node)->list;
+   else
+   entry = >sec->symbol_list;
+   list_add(>list, entry);
+   elf_hash_add(elf->symbol_hash, >hash, sym->idx);
+   elf_hash_add(elf->symbol_name_hash, >name_hash, 
str_hash(sym->name));
+
+   /*
+* Don't store empty STT_NOTYPE symbols in the rbtree.  They
+* can exist within a function, confusing the sorting.
+*/
+   if (!sym->len)
+   rb_erase(>node, >sec->symbol_tree);
+}
+
 static int read_symbols(struct elf *elf)
 {
struct section *symtab, *symtab_shndx, *sec;
struct symbol *sym, *pfunc;
-   struct list_head *entry;
-   struct rb_node *pnode;
int symbols_nr, i;
char *coldstr;
Elf_Data *shndx_data = NULL;
@@ -340,9 +367,6 @@ static int read_symbols(struct elf *elf)
goto err;
}
 
-   sym->type = GELF_ST_TYPE(sym->sym.st_info);
-   sym->bind = GELF_ST_BIND(sym->sym.st_info);
-
if ((sym->sym.st_shndx > SHN_UNDEF &&
 sym->sym.st_shndx < SHN_LORESERVE) ||
(shndx_data && sym->sym.st_shndx == SHN_XINDEX)) {
@@ -355,32 +379,14 @@ static int read_symbols(struct elf *elf)
 sym->name);
goto err;
}
-   if (sym->type == STT_SECTION) {
+   if (GELF_ST_TYPE(sym->sym.st_info) == STT_SECTION) {
sym->name = sym->sec->name;
sym->sec->sym = sym;
}
} else
sym->sec = find_section_by_index(elf, 0);
 
-   sym->offset = sym->sym.st_value;
-   sym->len = sym->sym.st_size;
-
-   rb_add(>node, >sec->symbol_tree, symbol_to_offset);
-   pnode = rb_prev(>node);
-   if (pnode)
-   entry = _entry(pnode, struct symbol, node)->list;
-   else
-   entry = >sec->symbol_list;
-   list_add(>list, entry);
-   elf_hash_add(elf->symbol_hash, >hash, sym->idx);
-   elf_hash_add(elf->symbol_name_hash, >name_hash, 
str_hash(sym->name));
-
-   /*
-* Don't store empty STT_NOTYPE symbols in the rbtree.  They
-* can exist within a function, confusing the sorting.
-*/
-   if (!sym->len)
-   rb_erase(>node, >sec->symbol_tree);
+   elf_add_symbol(elf, sym);
}
 
if (stats)


[tip: x86/core] objtool/x86: Rewrite retpoline thunk calls

2021-04-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 9bc0bb50727c8ac69fbb33fb937431cf3518ff37
Gitweb:
https://git.kernel.org/tip/9bc0bb50727c8ac69fbb33fb937431cf3518ff37
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:15 +01:00
Committer: Ingo Molnar 
CommitterDate: Fri, 02 Apr 2021 12:47:28 +02:00

objtool/x86: Rewrite retpoline thunk calls

When the compiler emits: "CALL __x86_indirect_thunk_\reg" for an
indirect call, have objtool rewrite it to:

ALTERNATIVE "call __x86_indirect_thunk_\reg",
"call *%reg", ALT_NOT(X86_FEATURE_RETPOLINE)

Additionally, in order to not emit endless identical
.altinst_replacement chunks, use a global symbol for them, see
__x86_indirect_alt_*.

This also avoids objtool from having to do code generation.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Signed-off-by: Ingo Molnar 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151300.320177...@infradead.org
---
 arch/x86/include/asm/asm-prototypes.h |  12 ++-
 arch/x86/lib/retpoline.S  |  41 -
 tools/objtool/arch/x86/decode.c   | 117 +-
 3 files changed, 167 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/asm-prototypes.h 
b/arch/x86/include/asm/asm-prototypes.h
index 0545b07..4cb726c 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -19,11 +19,19 @@ extern void cmpxchg8b_emu(void);
 
 #ifdef CONFIG_RETPOLINE
 
-#define DECL_INDIRECT_THUNK(reg) \
+#undef GEN
+#define GEN(reg) \
extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
+#include 
+
+#undef GEN
+#define GEN(reg) \
+   extern asmlinkage void __x86_indirect_alt_call_ ## reg (void);
+#include 
 
 #undef GEN
-#define GEN(reg) DECL_INDIRECT_THUNK(reg)
+#define GEN(reg) \
+   extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void);
 #include 
 
 #endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index d2c0d14..4d32cb0 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -10,6 +10,8 @@
 #include 
 #include 
 
+   .section .text.__x86.indirect_thunk
+
 .macro RETPOLINE reg
ANNOTATE_INTRA_FUNCTION_CALL
call.Ldo_rop_\@
@@ -25,9 +27,9 @@
 .endm
 
 .macro THUNK reg
-   .section .text.__x86.indirect_thunk
 
.align 32
+
 SYM_FUNC_START(__x86_indirect_thunk_\reg)
 
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
@@ -39,6 +41,32 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
 .endm
 
 /*
+ * This generates .altinstr_replacement symbols for use by objtool. They,
+ * however, must not actually live in .altinstr_replacement since that will be
+ * discarded after init, but module alternatives will also reference these
+ * symbols.
+ *
+ * Their names matches the "__x86_indirect_" prefix to mark them as retpolines.
+ */
+.macro ALT_THUNK reg
+
+   .align 1
+
+SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg)
+   ANNOTATE_RETPOLINE_SAFE
+1: call*%\reg
+2: .skip   5-(2b-1b), 0x90
+SYM_FUNC_END(__x86_indirect_alt_call_\reg)
+
+SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg)
+   ANNOTATE_RETPOLINE_SAFE
+1: jmp *%\reg
+2: .skip   5-(2b-1b), 0x90
+SYM_FUNC_END(__x86_indirect_alt_jmp_\reg)
+
+.endm
+
+/*
  * Despite being an assembler file we can't just use .irp here
  * because __KSYM_DEPS__ only uses the C preprocessor and would
  * only see one instance of "__x86_indirect_thunk_\reg" rather
@@ -61,3 +89,14 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
 #define GEN(reg) EXPORT_THUNK(reg)
 #include 
 
+#undef GEN
+#define GEN(reg) ALT_THUNK reg
+#include 
+
+#undef GEN
+#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_call_ ## reg)
+#include 
+
+#undef GEN
+#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_jmp_ ## reg)
+#include 
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 782894e..7e8b5be 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static unsigned char op_to_cfi_reg[][2] = {
{CFI_AX, CFI_R8},
@@ -613,6 +614,122 @@ const char *arch_nop_insn(int len)
return nops[len-1];
 }
 
+/* asm/alternative.h ? */
+
+#define ALTINSTR_FLAG_INV  (1 << 15)
+#define ALT_NOT(feat)  ((feat) | ALTINSTR_FLAG_INV)
+
+struct alt_instr {
+   s32 instr_offset;   /* original instruction */
+   s32 repl_offset;/* offset to replacement instruction */
+   u16 cpuid;  /* cpuid bit set for replacement */
+   u8  instrlen;   /* length of original instruction */
+   u8  replacementlen; /* length of new instruction */
+} __packed;
+
+static int elf_add_alternative(struct elf *elf,
+  struct instruction *orig, struct symbol *sym,
+  

[tip: x86/core] objtool: Keep track of retpoline call sites

2021-04-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 43d5430ad74ef5156353af7aec352426ec7a8e57
Gitweb:
https://git.kernel.org/tip/43d5430ad74ef5156353af7aec352426ec7a8e57
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:12 +01:00
Committer: Ingo Molnar 
CommitterDate: Fri, 02 Apr 2021 12:45:27 +02:00

objtool: Keep track of retpoline call sites

Provide infrastructure for architectures to rewrite/augment compiler
generated retpoline calls. Similar to what we do for static_call()s,
keep track of the instructions that are retpoline calls.

Use the same list_head, since a retpoline call cannot also be a
static_call.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Signed-off-by: Ingo Molnar 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151300.130805...@infradead.org
---
 tools/objtool/check.c   | 34 
 tools/objtool/include/objtool/arch.h|  2 +-
 tools/objtool/include/objtool/check.h   |  2 +-
 tools/objtool/include/objtool/objtool.h |  1 +-
 tools/objtool/objtool.c |  1 +-
 5 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 600fa67..77074db 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -451,7 +451,7 @@ static int create_static_call_sections(struct objtool_file 
*file)
return 0;
 
idx = 0;
-   list_for_each_entry(insn, >static_call_list, static_call_node)
+   list_for_each_entry(insn, >static_call_list, call_node)
idx++;
 
sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE,
@@ -460,7 +460,7 @@ static int create_static_call_sections(struct objtool_file 
*file)
return -1;
 
idx = 0;
-   list_for_each_entry(insn, >static_call_list, static_call_node) {
+   list_for_each_entry(insn, >static_call_list, call_node) {
 
site = (struct static_call_site *)sec->data->d_buf + idx;
memset(site, 0, sizeof(struct static_call_site));
@@ -829,13 +829,16 @@ static int add_jump_destinations(struct objtool_file 
*file)
else
insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL;
 
+   list_add_tail(>call_node,
+ >retpoline_call_list);
+
insn->retpoline_safe = true;
continue;
} else if (insn->func) {
/* internal or external sibling call (with reloc) */
insn->call_dest = reloc->sym;
if (insn->call_dest->static_call_tramp) {
-   list_add_tail(>static_call_node,
+   list_add_tail(>call_node,
  >static_call_list);
}
continue;
@@ -897,7 +900,7 @@ static int add_jump_destinations(struct objtool_file *file)
/* internal sibling call (without reloc) */
insn->call_dest = insn->jump_dest->func;
if (insn->call_dest->static_call_tramp) {
-   list_add_tail(>static_call_node,
+   list_add_tail(>call_node,
  >static_call_list);
}
}
@@ -981,6 +984,9 @@ static int add_call_destinations(struct objtool_file *file)
insn->type = INSN_CALL_DYNAMIC;
insn->retpoline_safe = true;
 
+   list_add_tail(>call_node,
+ >retpoline_call_list);
+
remove_insn_ops(insn);
continue;
 
@@ -988,7 +994,7 @@ static int add_call_destinations(struct objtool_file *file)
insn->call_dest = reloc->sym;
 
if (insn->call_dest && insn->call_dest->static_call_tramp) {
-   list_add_tail(>static_call_node,
+   list_add_tail(>call_node,
  >static_call_list);
}
 
@@ -1714,6 +1720,11 @@ static void mark_rodata(struct objtool_file *file)
file->rodata = found;
 }
 
+__weak int arch_rewrite_retpolines(struct objtool_file *file)
+{
+   return 0;
+}
+
 static int decode_sections(struct objtool_file *file)
 {
int ret;
@@ -1742,6 +1753,10 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
 
+   /*
+* Must be before add_special_section_alts() as that depends on
+* jump_dest being set.
+*/
ret = add_jump_destinations(file);
if (ret)
return ret;
@@ -1778,6 

[tip: x86/core] objtool: Cache instruction relocs

2021-04-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 7bd2a600f3e9d27286bbf23c83d599e9cc7cf245
Gitweb:
https://git.kernel.org/tip/7bd2a600f3e9d27286bbf23c83d599e9cc7cf245
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:13 +01:00
Committer: Ingo Molnar 
CommitterDate: Fri, 02 Apr 2021 12:46:15 +02:00

objtool: Cache instruction relocs

Track the reloc of instructions in the new instruction->reloc field
to avoid having to look them up again later.

( Technically x86 instructions can have two relocations, but not jumps
  and calls, for which we're using this. )

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Signed-off-by: Ingo Molnar 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151300.195441...@infradead.org
---
 tools/objtool/check.c | 28 --
 tools/objtool/include/objtool/check.h |  1 +-
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 77074db..1f4154f 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -797,6 +797,25 @@ __weak bool arch_is_retpoline(struct symbol *sym)
return false;
 }
 
+#define NEGATIVE_RELOC ((void *)-1L)
+
+static struct reloc *insn_reloc(struct objtool_file *file, struct instruction 
*insn)
+{
+   if (insn->reloc == NEGATIVE_RELOC)
+   return NULL;
+
+   if (!insn->reloc) {
+   insn->reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+  insn->offset, insn->len);
+   if (!insn->reloc) {
+   insn->reloc = NEGATIVE_RELOC;
+   return NULL;
+   }
+   }
+
+   return insn->reloc;
+}
+
 /*
  * Find the destination instructions for all jumps.
  */
@@ -811,8 +830,7 @@ static int add_jump_destinations(struct objtool_file *file)
if (!is_static_jump(insn))
continue;
 
-   reloc = find_reloc_by_dest_range(file->elf, insn->sec,
-insn->offset, insn->len);
+   reloc = insn_reloc(file, insn);
if (!reloc) {
dest_sec = insn->sec;
dest_off = arch_jump_destination(insn);
@@ -944,8 +962,7 @@ static int add_call_destinations(struct objtool_file *file)
if (insn->type != INSN_CALL)
continue;
 
-   reloc = find_reloc_by_dest_range(file->elf, insn->sec,
-  insn->offset, insn->len);
+   reloc = insn_reloc(file, insn);
if (!reloc) {
dest_off = arch_jump_destination(insn);
insn->call_dest = find_call_destination(insn->sec, 
dest_off);
@@ -1144,8 +1161,7 @@ static int handle_group_alt(struct objtool_file *file,
 * alternatives code can adjust the relative offsets
 * accordingly.
 */
-   alt_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
-  insn->offset, insn->len);
+   alt_reloc = insn_reloc(file, insn);
if (alt_reloc &&
!arch_support_alt_relocation(special_alt, insn, alt_reloc)) 
{
 
diff --git a/tools/objtool/include/objtool/check.h 
b/tools/objtool/include/objtool/check.h
index e5528ce..56d50bc 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -56,6 +56,7 @@ struct instruction {
struct instruction *jump_dest;
struct instruction *first_jump_src;
struct reloc *jump_table;
+   struct reloc *reloc;
struct list_head alts;
struct symbol *func;
struct list_head stack_ops;


[tip: x86/core] objtool: Add elf_create_undef_symbol()

2021-04-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 2f2f7e47f0525cbaad5dd9675fd9d8aa8da12046
Gitweb:
https://git.kernel.org/tip/2f2f7e47f0525cbaad5dd9675fd9d8aa8da12046
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:11 +01:00
Committer: Ingo Molnar 
CommitterDate: Fri, 02 Apr 2021 12:45:05 +02:00

objtool: Add elf_create_undef_symbol()

Allow objtool to create undefined symbols; this allows creating
relocations to symbols not currently in the symbol table.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Signed-off-by: Ingo Molnar 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151300.064743...@infradead.org
---
 tools/objtool/elf.c | 60 -
 tools/objtool/include/objtool/elf.h |  1 +-
 2 files changed, 61 insertions(+)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 8457218..d08f5f3 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -715,6 +715,66 @@ static int elf_add_string(struct elf *elf, struct section 
*strtab, char *str)
return len;
 }
 
+struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
+{
+   struct section *symtab;
+   struct symbol *sym;
+   Elf_Data *data;
+   Elf_Scn *s;
+
+   sym = malloc(sizeof(*sym));
+   if (!sym) {
+   perror("malloc");
+   return NULL;
+   }
+   memset(sym, 0, sizeof(*sym));
+
+   sym->name = strdup(name);
+
+   sym->sym.st_name = elf_add_string(elf, NULL, sym->name);
+   if (sym->sym.st_name == -1)
+   return NULL;
+
+   sym->sym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE);
+   // st_other 0
+   // st_shndx 0
+   // st_value 0
+   // st_size 0
+
+   symtab = find_section_by_name(elf, ".symtab");
+   if (!symtab) {
+   WARN("can't find .symtab");
+   return NULL;
+   }
+
+   s = elf_getscn(elf->elf, symtab->idx);
+   if (!s) {
+   WARN_ELF("elf_getscn");
+   return NULL;
+   }
+
+   data = elf_newdata(s);
+   if (!data) {
+   WARN_ELF("elf_newdata");
+   return NULL;
+   }
+
+   data->d_buf = >sym;
+   data->d_size = sizeof(sym->sym);
+   data->d_align = 1;
+
+   sym->idx = symtab->len / sizeof(sym->sym);
+
+   symtab->len += data->d_size;
+   symtab->changed = true;
+
+   sym->sec = find_section_by_index(elf, 0);
+
+   elf_add_symbol(elf, sym);
+
+   return sym;
+}
+
 struct section *elf_create_section(struct elf *elf, const char *name,
   unsigned int sh_flags, size_t entsize, int 
nr)
 {
diff --git a/tools/objtool/include/objtool/elf.h 
b/tools/objtool/include/objtool/elf.h
index 463f329..45e5ede 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -133,6 +133,7 @@ int elf_write_insn(struct elf *elf, struct section *sec,
   unsigned long offset, unsigned int len,
   const char *insn);
 int elf_write_reloc(struct elf *elf, struct reloc *reloc);
+struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name);
 int elf_write(struct elf *elf);
 void elf_close(struct elf *elf);
 


[tip: x86/core] objtool: Skip magical retpoline .altinstr_replacement

2021-04-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 50e7b4a1a1b264fc7df0698f2defb93cadf19a7b
Gitweb:
https://git.kernel.org/tip/50e7b4a1a1b264fc7df0698f2defb93cadf19a7b
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:14 +01:00
Committer: Ingo Molnar 
CommitterDate: Fri, 02 Apr 2021 12:46:57 +02:00

objtool: Skip magical retpoline .altinstr_replacement

When the .altinstr_replacement is a retpoline, skip the alternative.
We already special case retpolines anyway.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Signed-off-by: Ingo Molnar 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151300.259429...@infradead.org
---
 tools/objtool/special.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index 2c7fbda..07b21cf 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -106,6 +106,14 @@ static int get_alt_entry(struct elf *elf, struct 
special_entry *entry,
return -1;
}
 
+   /*
+* Skip retpoline .altinstr_replacement... we already rewrite 
the
+* instructions for retpolines anyway, see arch_is_retpoline()
+* usage in add_{call,jump}_destinations().
+*/
+   if (arch_is_retpoline(new_reloc->sym))
+   return 1;
+
alt->new_sec = new_reloc->sym->sec;
alt->new_off = (unsigned int)new_reloc->addend;
 
@@ -154,7 +162,9 @@ int special_get_alts(struct elf *elf, struct list_head 
*alts)
memset(alt, 0, sizeof(*alt));
 
ret = get_alt_entry(elf, entry, sec, idx, alt);
-   if (ret)
+   if (ret > 0)
+   continue;
+   if (ret < 0)
return ret;
 
list_add_tail(>list, alts);


[tip: x86/core] objtool: Fix static_call list generation

2021-04-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: b62b63571e4be0ce31984ce83b04853f2cba678b
Gitweb:
https://git.kernel.org/tip/b62b63571e4be0ce31984ce83b04853f2cba678b
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:05 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 01 Apr 2021 11:43:16 +02:00

objtool: Fix static_call list generation

Currently, objtool generates tail call entries in add_jump_destination()
but waits until validate_branch() to generate the regular call entries.
Move these to add_call_destination() for consistency.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151259.691529...@infradead.org
---
 tools/objtool/check.c | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 6fbc001..8618d03 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1045,6 +1045,11 @@ static int add_call_destinations(struct objtool_file 
*file)
} else
insn->call_dest = reloc->sym;
 
+   if (insn->call_dest && insn->call_dest->static_call_tramp) {
+   list_add_tail(>static_call_node,
+ >static_call_list);
+   }
+
/*
 * Many compilers cannot disable KCOV with a function attribute
 * so they need a little help, NOP out any KCOV calls from 
noinstr
@@ -1788,6 +1793,9 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
 
+   /*
+* Must be before add_{jump_call}_destination.
+*/
ret = read_static_call_tramps(file);
if (ret)
return ret;
@@ -1800,6 +1808,10 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
 
+   /*
+* Must be before add_call_destination(); it changes INSN_CALL to
+* INSN_JUMP.
+*/
ret = read_intra_function_calls(file);
if (ret)
return ret;
@@ -2762,11 +2774,6 @@ static int validate_branch(struct objtool_file *file, 
struct symbol *func,
if (dead_end_function(file, insn->call_dest))
return 0;
 
-   if (insn->type == INSN_CALL && 
insn->call_dest->static_call_tramp) {
-   list_add_tail(>static_call_node,
- >static_call_list);
-   }
-
break;
 
case INSN_JUMP_CONDITIONAL:


[tip: x86/core] objtool: Cache instruction relocs

2021-04-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 4ecdc0265dc911adba0772fd6e816d48da678fe7
Gitweb:
https://git.kernel.org/tip/4ecdc0265dc911adba0772fd6e816d48da678fe7
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:13 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 01 Apr 2021 13:25:38 +02:00

objtool: Cache instruction relocs

Track the reloc of instructions to avoid having to look them up again
later.

(Technically x86 instructions can have two relocations, but not jumps
and calls, for which we're using this.)

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151300.195441...@infradead.org
---
 tools/objtool/check.c | 28 --
 tools/objtool/include/objtool/check.h |  1 +-
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 77074db..1f4154f 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -797,6 +797,25 @@ __weak bool arch_is_retpoline(struct symbol *sym)
return false;
 }
 
+#define NEGATIVE_RELOC ((void *)-1L)
+
+static struct reloc *insn_reloc(struct objtool_file *file, struct instruction 
*insn)
+{
+   if (insn->reloc == NEGATIVE_RELOC)
+   return NULL;
+
+   if (!insn->reloc) {
+   insn->reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+  insn->offset, insn->len);
+   if (!insn->reloc) {
+   insn->reloc = NEGATIVE_RELOC;
+   return NULL;
+   }
+   }
+
+   return insn->reloc;
+}
+
 /*
  * Find the destination instructions for all jumps.
  */
@@ -811,8 +830,7 @@ static int add_jump_destinations(struct objtool_file *file)
if (!is_static_jump(insn))
continue;
 
-   reloc = find_reloc_by_dest_range(file->elf, insn->sec,
-insn->offset, insn->len);
+   reloc = insn_reloc(file, insn);
if (!reloc) {
dest_sec = insn->sec;
dest_off = arch_jump_destination(insn);
@@ -944,8 +962,7 @@ static int add_call_destinations(struct objtool_file *file)
if (insn->type != INSN_CALL)
continue;
 
-   reloc = find_reloc_by_dest_range(file->elf, insn->sec,
-  insn->offset, insn->len);
+   reloc = insn_reloc(file, insn);
if (!reloc) {
dest_off = arch_jump_destination(insn);
insn->call_dest = find_call_destination(insn->sec, 
dest_off);
@@ -1144,8 +1161,7 @@ static int handle_group_alt(struct objtool_file *file,
 * alternatives code can adjust the relative offsets
 * accordingly.
 */
-   alt_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
-  insn->offset, insn->len);
+   alt_reloc = insn_reloc(file, insn);
if (alt_reloc &&
!arch_support_alt_relocation(special_alt, insn, alt_reloc)) 
{
 
diff --git a/tools/objtool/include/objtool/check.h 
b/tools/objtool/include/objtool/check.h
index e5528ce..56d50bc 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -56,6 +56,7 @@ struct instruction {
struct instruction *jump_dest;
struct instruction *first_jump_src;
struct reloc *jump_table;
+   struct reloc *reloc;
struct list_head alts;
struct symbol *func;
struct list_head stack_ops;


[tip: x86/core] objtool: Rework rebuild_reloc logic

2021-04-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 98ce4d014ad4c1c4afcc427fc3f0002674315cb9
Gitweb:
https://git.kernel.org/tip/98ce4d014ad4c1c4afcc427fc3f0002674315cb9
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:06 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 01 Apr 2021 12:51:35 +02:00

objtool: Rework rebuild_reloc logic

Instead of manually calling elf_rebuild_reloc_section() on sections
we've called elf_add_reloc() on, have elf_write() DTRT.

This makes it easier to add random relocations in places without
carefully tracking when we're done and need to flush what section.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151259.754213...@infradead.org
---
 tools/objtool/check.c   |  6 --
 tools/objtool/elf.c | 20 ++--
 tools/objtool/include/objtool/elf.h |  1 -
 tools/objtool/orc_gen.c |  3 ---
 4 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 8618d03..1d0415b 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -542,9 +542,6 @@ static int create_static_call_sections(struct objtool_file 
*file)
idx++;
}
 
-   if (elf_rebuild_reloc_section(file->elf, reloc_sec))
-   return -1;
-
return 0;
 }
 
@@ -614,9 +611,6 @@ static int create_mcount_loc_sections(struct objtool_file 
*file)
idx++;
}
 
-   if (elf_rebuild_reloc_section(file->elf, reloc_sec))
-   return -1;
-
return 0;
 }
 
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 93fa833..374813e 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -479,6 +479,8 @@ void elf_add_reloc(struct elf *elf, struct reloc *reloc)
 
list_add_tail(>list, >reloc_list);
elf_hash_add(elf->reloc_hash, >hash, reloc_hash(reloc));
+
+   sec->changed = true;
 }
 
 static int read_rel_reloc(struct section *sec, int i, struct reloc *reloc, 
unsigned int *symndx)
@@ -558,7 +560,9 @@ static int read_relocs(struct elf *elf)
return -1;
}
 
-   elf_add_reloc(elf, reloc);
+   list_add_tail(>list, >reloc_list);
+   elf_hash_add(elf->reloc_hash, >hash, 
reloc_hash(reloc));
+
nr_reloc++;
}
max_reloc = max(max_reloc, nr_reloc);
@@ -873,14 +877,11 @@ static int elf_rebuild_rela_reloc_section(struct section 
*sec, int nr)
return 0;
 }
 
-int elf_rebuild_reloc_section(struct elf *elf, struct section *sec)
+static int elf_rebuild_reloc_section(struct elf *elf, struct section *sec)
 {
struct reloc *reloc;
int nr;
 
-   sec->changed = true;
-   elf->changed = true;
-
nr = 0;
list_for_each_entry(reloc, >reloc_list, list)
nr++;
@@ -944,9 +945,15 @@ int elf_write(struct elf *elf)
struct section *sec;
Elf_Scn *s;
 
-   /* Update section headers for changed sections: */
+   /* Update changed relocation sections and section headers: */
list_for_each_entry(sec, >sections, list) {
if (sec->changed) {
+   if (sec->base &&
+   elf_rebuild_reloc_section(elf, sec)) {
+   WARN("elf_rebuild_reloc_section");
+   return -1;
+   }
+
s = elf_getscn(elf->elf, sec->idx);
if (!s) {
WARN_ELF("elf_getscn");
@@ -958,6 +965,7 @@ int elf_write(struct elf *elf)
}
 
sec->changed = false;
+   elf->changed = true;
}
}
 
diff --git a/tools/objtool/include/objtool/elf.h 
b/tools/objtool/include/objtool/elf.h
index e6890cc..fc576ed 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -142,7 +142,6 @@ struct reloc *find_reloc_by_dest_range(const struct elf 
*elf, struct section *se
 struct symbol *find_func_containing(struct section *sec, unsigned long offset);
 void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset,
  struct reloc *reloc);
-int elf_rebuild_reloc_section(struct elf *elf, struct section *sec);
 
 #define for_each_sec(file, sec)
\
list_for_each_entry(sec, >elf->sections, list)
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 738aa50..f534708 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -254,8 +254,5 @@ int orc_create(struct objtool_file *file)
return -1;
}
 
-   if 

[tip: x86/core] x86: Add insn_decode_kernel()

2021-04-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 52fa82c21f64e900a72437269a5cc9e0034b424e
Gitweb:
https://git.kernel.org/tip/52fa82c21f64e900a72437269a5cc9e0034b424e
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:00 +01:00
Committer: Borislav Petkov 
CommitterDate: Wed, 31 Mar 2021 16:20:22 +02:00

x86: Add insn_decode_kernel()

Add a helper to decode kernel instructions; there's no point in
endlessly repeating those last two arguments.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Link: https://lkml.kernel.org/r/20210326151259.379242...@infradead.org
---
 arch/x86/include/asm/insn.h| 2 ++
 arch/x86/kernel/alternative.c  | 2 +-
 arch/x86/kernel/cpu/mce/severity.c | 2 +-
 arch/x86/kernel/kprobes/core.c | 4 ++--
 arch/x86/kernel/kprobes/opt.c  | 2 +-
 arch/x86/kernel/traps.c| 2 +-
 tools/arch/x86/include/asm/insn.h  | 2 ++
 7 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index f03b6ca..05a6ab9 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -150,6 +150,8 @@ enum insn_mode {
 
 extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum 
insn_mode m);
 
+#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), 
MAX_INSN_SIZE, INSN_MODE_KERN)
+
 /* Attribute will be determined after getting ModRM (for opcode groups) */
 static inline void insn_get_attribute(struct insn *insn)
 {
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index ce28c5c..ff359b3 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1280,7 +1280,7 @@ static void text_poke_loc_init(struct text_poke_loc *tp, 
void *addr,
if (!emulate)
emulate = opcode;
 
-   ret = insn_decode(, emulate, MAX_INSN_SIZE, INSN_MODE_KERN);
+   ret = insn_decode_kernel(, emulate);
 
BUG_ON(ret < 0);
BUG_ON(len != insn.length);
diff --git a/arch/x86/kernel/cpu/mce/severity.c 
b/arch/x86/kernel/cpu/mce/severity.c
index a2136ce..abdd2e4 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -225,7 +225,7 @@ static bool is_copy_from_user(struct pt_regs *regs)
if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
return false;
 
-   ret = insn_decode(, insn_buf, MAX_INSN_SIZE, INSN_MODE_KERN);
+   ret = insn_decode_kernel(, insn_buf);
if (ret < 0)
return false;
 
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index dd09021..1319ff4 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -285,7 +285,7 @@ static int can_probe(unsigned long paddr)
if (!__addr)
return 0;
 
-   ret = insn_decode(, (void *)__addr, MAX_INSN_SIZE, 
INSN_MODE_KERN);
+   ret = insn_decode_kernel(, (void *)__addr);
if (ret < 0)
return 0;
 
@@ -322,7 +322,7 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct 
insn *insn)
MAX_INSN_SIZE))
return 0;
 
-   ret = insn_decode(insn, dest, MAX_INSN_SIZE, INSN_MODE_KERN);
+   ret = insn_decode_kernel(insn, dest);
if (ret < 0)
return 0;
 
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 4299fc8..71425eb 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -324,7 +324,7 @@ static int can_optimize(unsigned long paddr)
if (!recovered_insn)
return 0;
 
-   ret = insn_decode(, (void *)recovered_insn, MAX_INSN_SIZE, 
INSN_MODE_KERN);
+   ret = insn_decode_kernel(, (void *)recovered_insn);
if (ret < 0)
return 0;
 
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a5d2540..034f27f 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -504,7 +504,7 @@ static enum kernel_gp_hint get_kernel_gp_address(struct 
pt_regs *regs,
MAX_INSN_SIZE))
return GP_NO_HINT;
 
-   ret = insn_decode(, insn_buf, MAX_INSN_SIZE, INSN_MODE_KERN);
+   ret = insn_decode_kernel(, insn_buf);
if (ret < 0)
return GP_NO_HINT;
 
diff --git a/tools/arch/x86/include/asm/insn.h 
b/tools/arch/x86/include/asm/insn.h
index c9f3eee..dc632b4 100644
--- a/tools/arch/x86/include/asm/insn.h
+++ b/tools/arch/x86/include/asm/insn.h
@@ -150,6 +150,8 @@ enum insn_mode {
 
 extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum 
insn_mode m);
 
+#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), 
MAX_INSN_SIZE, INSN_MODE_KERN)
+
 /* Attribute will be determined after getting ModRM (for opcode 

[tip: x86/core] x86/alternatives: Optimize optimize_nops()

2021-04-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: b4da5166b084f3fac01d68e0e67cbf3bf78a3e12
Gitweb:
https://git.kernel.org/tip/b4da5166b084f3fac01d68e0e67cbf3bf78a3e12
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:01 +01:00
Committer: Borislav Petkov 
CommitterDate: Wed, 31 Mar 2021 20:30:04 +02:00

x86/alternatives: Optimize optimize_nops()

Currently, optimize_nops() scans to see if the alternative starts with
NOPs. However, the emit pattern is:

  141:  \oldinstr
  142:  .skip (len-(142b-141b)), 0x90

That is, when oldinstr is short, the tail is padded with NOPs. This case
never gets optimized.

Rewrite optimize_nops() to replace any trailing string of NOPs inside
the alternative to larger NOPs. Also run it irrespective of patching,
replacing NOPs in both the original and replaced code.

A direct consequence is that padlen becomes superfluous, so remove it.

 [ bp:
   - Adjust commit message
   - remove a stale comment about needing to pad
   - add a comment in optimize_nops()
   - exit early if the NOP verif. loop catches a mismatch - function
 should not not add NOPs in that case
   - fix the "optimized NOPs" offsets output ]

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Link: https://lkml.kernel.org/r/20210326151259.442992...@infradead.org
---
 arch/x86/include/asm/alternative.h| 17 +-
 arch/x86/kernel/alternative.c | 49 +++---
 tools/objtool/arch/x86/include/arch/special.h |  2 +-
 3 files changed, 37 insertions(+), 31 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h 
b/arch/x86/include/asm/alternative.h
index 17b3609..a3c2315 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -65,7 +65,6 @@ struct alt_instr {
u16 cpuid;  /* cpuid bit set for replacement */
u8  instrlen;   /* length of original instruction */
u8  replacementlen; /* length of new instruction */
-   u8  padlen; /* length of build-time padding */
 } __packed;
 
 /*
@@ -104,7 +103,6 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
 
 #define alt_end_marker "663"
 #define alt_slen   "662b-661b"
-#define alt_pad_lenalt_end_marker"b-662b"
 #define alt_total_slen alt_end_marker"b-661b"
 #define alt_rlen(num)  e_replacement(num)"f-"b_replacement(num)"f"
 
@@ -151,8 +149,7 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
" .long " b_replacement(num)"f - .\n"   /* new instruction */ \
" .word " __stringify(feature) "\n" /* feature bit */ \
" .byte " alt_total_slen "\n"   /* source len  */ \
-   " .byte " alt_rlen(num) "\n"/* replacement len */ \
-   " .byte " alt_pad_len "\n"  /* pad len */
+   " .byte " alt_rlen(num) "\n"/* replacement len */
 
 #define ALTINSTR_REPLACEMENT(newinstr, num)/* replacement */   
\
"# ALT: replacement " #num "\n" 
\
@@ -224,9 +221,6 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
  * Peculiarities:
  * No memory clobber here.
  * Argument numbers start with 1.
- * Best is to use constraints that are fixed size (like (%1) ... "r")
- * If you use variable sized constraints like "m" or "g" in the
- * replacement make sure to pad to the worst case length.
  * Leaving an unused argument 0 to keep API compatibility.
  */
 #define alternative_input(oldinstr, newinstr, feature, input...)   \
@@ -315,13 +309,12 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
  * enough information for the alternatives patching code to patch an
  * instruction. See apply_alternatives().
  */
-.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
+.macro altinstruction_entry orig alt feature orig_len alt_len
.long \orig - .
.long \alt - .
.word \feature
.byte \orig_len
.byte \alt_len
-   .byte \pad_len
 .endm
 
 /*
@@ -338,7 +331,7 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
 142:
 
.pushsection .altinstructions,"a"
-   altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
+   altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f
.popsection
 
.pushsection .altinstr_replacement,"ax"
@@ -375,8 +368,8 @@ static inline int alternatives_text_reserved(void *start, 
void *end)
 142:
 
.pushsection .altinstructions,"a"
-   altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
-   altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
+   altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f
+   altinstruction_entry 

[tip: x86/core] objtool: Correctly handle retpoline thunk calls

2021-04-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: db9d1dd670d7f3f146c654f289f20968af6a12de
Gitweb:
https://git.kernel.org/tip/db9d1dd670d7f3f146c654f289f20968af6a12de
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:03 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 01 Apr 2021 11:34:01 +02:00

objtool: Correctly handle retpoline thunk calls

Just like JMP handling, convert a direct CALL to a retpoline thunk
into a retpoline safe indirect CALL.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151259.567568...@infradead.org
---
 tools/objtool/check.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index d45f018..519af4b 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1025,6 +1025,18 @@ static int add_call_destinations(struct objtool_file 
*file)
  dest_off);
return -1;
}
+
+   } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 
21)) {
+   /*
+* Retpoline calls are really dynamic calls in
+* disguise, so convert them accordingly.
+*/
+   insn->type = INSN_CALL_DYNAMIC;
+   insn->retpoline_safe = true;
+
+   remove_insn_ops(insn);
+   continue;
+
} else
insn->call_dest = reloc->sym;
 


[tip: x86/core] objtool: Extract elf_symbol_add()

2021-04-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: d56a3568827ec4b8efcbcfc46fdc944995b6dcf1
Gitweb:
https://git.kernel.org/tip/d56a3568827ec4b8efcbcfc46fdc944995b6dcf1
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:10 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 01 Apr 2021 13:08:52 +02:00

objtool: Extract elf_symbol_add()

Create a common helper to add symbols.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151300.003468...@infradead.org
---
 tools/objtool/elf.c | 56 
 1 file changed, 31 insertions(+), 25 deletions(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index c278a04..8457218 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -290,12 +290,39 @@ static int read_sections(struct elf *elf)
return 0;
 }
 
+static void elf_add_symbol(struct elf *elf, struct symbol *sym)
+{
+   struct list_head *entry;
+   struct rb_node *pnode;
+
+   sym->type = GELF_ST_TYPE(sym->sym.st_info);
+   sym->bind = GELF_ST_BIND(sym->sym.st_info);
+
+   sym->offset = sym->sym.st_value;
+   sym->len = sym->sym.st_size;
+
+   rb_add(>node, >sec->symbol_tree, symbol_to_offset);
+   pnode = rb_prev(>node);
+   if (pnode)
+   entry = _entry(pnode, struct symbol, node)->list;
+   else
+   entry = >sec->symbol_list;
+   list_add(>list, entry);
+   elf_hash_add(elf->symbol_hash, >hash, sym->idx);
+   elf_hash_add(elf->symbol_name_hash, >name_hash, 
str_hash(sym->name));
+
+   /*
+* Don't store empty STT_NOTYPE symbols in the rbtree.  They
+* can exist within a function, confusing the sorting.
+*/
+   if (!sym->len)
+   rb_erase(>node, >sec->symbol_tree);
+}
+
 static int read_symbols(struct elf *elf)
 {
struct section *symtab, *symtab_shndx, *sec;
struct symbol *sym, *pfunc;
-   struct list_head *entry;
-   struct rb_node *pnode;
int symbols_nr, i;
char *coldstr;
Elf_Data *shndx_data = NULL;
@@ -340,9 +367,6 @@ static int read_symbols(struct elf *elf)
goto err;
}
 
-   sym->type = GELF_ST_TYPE(sym->sym.st_info);
-   sym->bind = GELF_ST_BIND(sym->sym.st_info);
-
if ((sym->sym.st_shndx > SHN_UNDEF &&
 sym->sym.st_shndx < SHN_LORESERVE) ||
(shndx_data && sym->sym.st_shndx == SHN_XINDEX)) {
@@ -355,32 +379,14 @@ static int read_symbols(struct elf *elf)
 sym->name);
goto err;
}
-   if (sym->type == STT_SECTION) {
+   if (GELF_ST_TYPE(sym->sym.st_info) == STT_SECTION) {
sym->name = sym->sec->name;
sym->sec->sym = sym;
}
} else
sym->sec = find_section_by_index(elf, 0);
 
-   sym->offset = sym->sym.st_value;
-   sym->len = sym->sym.st_size;
-
-   rb_add(>node, >sec->symbol_tree, symbol_to_offset);
-   pnode = rb_prev(>node);
-   if (pnode)
-   entry = _entry(pnode, struct symbol, node)->list;
-   else
-   entry = >sec->symbol_list;
-   list_add(>list, entry);
-   elf_hash_add(elf->symbol_hash, >hash, sym->idx);
-   elf_hash_add(elf->symbol_name_hash, >name_hash, 
str_hash(sym->name));
-
-   /*
-* Don't store empty STT_NOTYPE symbols in the rbtree.  They
-* can exist within a function, confusing the sorting.
-*/
-   if (!sym->len)
-   rb_erase(>node, >sec->symbol_tree);
+   elf_add_symbol(elf, sym);
}
 
if (stats)


[tip: x86/core] objtool/x86: Rewrite retpoline thunk calls

2021-04-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: f31390437ce984118215169d75570e365457ec23
Gitweb:
https://git.kernel.org/tip/f31390437ce984118215169d75570e365457ec23
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:15 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 01 Apr 2021 14:30:45 +02:00

objtool/x86: Rewrite retpoline thunk calls

When the compiler emits: "CALL __x86_indirect_thunk_\reg" for an
indirect call, have objtool rewrite it to:

ALTERNATIVE "call __x86_indirect_thunk_\reg",
"call *%reg", ALT_NOT(X86_FEATURE_RETPOLINE)

Additionally, in order to not emit endless identical
.altinst_replacement chunks, use a global symbol for them, see
__x86_indirect_alt_*.

This also avoids objtool from having to do code generation.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151300.320177...@infradead.org
---
 arch/x86/include/asm/asm-prototypes.h |  12 ++-
 arch/x86/lib/retpoline.S  |  41 -
 tools/objtool/arch/x86/decode.c   | 117 +-
 3 files changed, 167 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/asm-prototypes.h 
b/arch/x86/include/asm/asm-prototypes.h
index 0545b07..4cb726c 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -19,11 +19,19 @@ extern void cmpxchg8b_emu(void);
 
 #ifdef CONFIG_RETPOLINE
 
-#define DECL_INDIRECT_THUNK(reg) \
+#undef GEN
+#define GEN(reg) \
extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
+#include 
+
+#undef GEN
+#define GEN(reg) \
+   extern asmlinkage void __x86_indirect_alt_call_ ## reg (void);
+#include 
 
 #undef GEN
-#define GEN(reg) DECL_INDIRECT_THUNK(reg)
+#define GEN(reg) \
+   extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void);
 #include 
 
 #endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index d2c0d14..4d32cb0 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -10,6 +10,8 @@
 #include 
 #include 
 
+   .section .text.__x86.indirect_thunk
+
 .macro RETPOLINE reg
ANNOTATE_INTRA_FUNCTION_CALL
call.Ldo_rop_\@
@@ -25,9 +27,9 @@
 .endm
 
 .macro THUNK reg
-   .section .text.__x86.indirect_thunk
 
.align 32
+
 SYM_FUNC_START(__x86_indirect_thunk_\reg)
 
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
@@ -39,6 +41,32 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
 .endm
 
 /*
+ * This generates .altinstr_replacement symbols for use by objtool. They,
+ * however, must not actually live in .altinstr_replacement since that will be
+ * discarded after init, but module alternatives will also reference these
+ * symbols.
+ *
+ * Their names matches the "__x86_indirect_" prefix to mark them as retpolines.
+ */
+.macro ALT_THUNK reg
+
+   .align 1
+
+SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg)
+   ANNOTATE_RETPOLINE_SAFE
+1: call*%\reg
+2: .skip   5-(2b-1b), 0x90
+SYM_FUNC_END(__x86_indirect_alt_call_\reg)
+
+SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg)
+   ANNOTATE_RETPOLINE_SAFE
+1: jmp *%\reg
+2: .skip   5-(2b-1b), 0x90
+SYM_FUNC_END(__x86_indirect_alt_jmp_\reg)
+
+.endm
+
+/*
  * Despite being an assembler file we can't just use .irp here
  * because __KSYM_DEPS__ only uses the C preprocessor and would
  * only see one instance of "__x86_indirect_thunk_\reg" rather
@@ -61,3 +89,14 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
 #define GEN(reg) EXPORT_THUNK(reg)
 #include 
 
+#undef GEN
+#define GEN(reg) ALT_THUNK reg
+#include 
+
+#undef GEN
+#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_call_ ## reg)
+#include 
+
+#undef GEN
+#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_jmp_ ## reg)
+#include 
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index e5fa3a5..44375fa 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -16,6 +16,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static unsigned char op_to_cfi_reg[][2] = {
{CFI_AX, CFI_R8},
@@ -610,6 +611,122 @@ const char *arch_nop_insn(int len)
return nops[len-1];
 }
 
+/* asm/alternative.h ? */
+
+#define ALTINSTR_FLAG_INV  (1 << 15)
+#define ALT_NOT(feat)  ((feat) | ALTINSTR_FLAG_INV)
+
+struct alt_instr {
+   s32 instr_offset;   /* original instruction */
+   s32 repl_offset;/* offset to replacement instruction */
+   u16 cpuid;  /* cpuid bit set for replacement */
+   u8  instrlen;   /* length of original instruction */
+   u8  replacementlen; /* length of new instruction */
+} __packed;
+
+static int elf_add_alternative(struct elf *elf,
+  struct instruction *orig, struct symbol *sym,
+  int 

[tip: x86/core] objtool: Extract elf_strtab_concat()

2021-04-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 557c25be3588971caf21364b6fd240769e37c47c
Gitweb:
https://git.kernel.org/tip/557c25be3588971caf21364b6fd240769e37c47c
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:09 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 01 Apr 2021 13:05:50 +02:00

objtool: Extract elf_strtab_concat()

Create a common helper to append strings to a strtab.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151259.941474...@infradead.org
---
 tools/objtool/elf.c | 60 +++-
 1 file changed, 38 insertions(+), 22 deletions(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 7b65ae3..c278a04 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -673,13 +673,48 @@ err:
return NULL;
 }
 
+static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
+{
+   Elf_Data *data;
+   Elf_Scn *s;
+   int len;
+
+   if (!strtab)
+   strtab = find_section_by_name(elf, ".strtab");
+   if (!strtab) {
+   WARN("can't find .strtab section");
+   return -1;
+   }
+
+   s = elf_getscn(elf->elf, strtab->idx);
+   if (!s) {
+   WARN_ELF("elf_getscn");
+   return -1;
+   }
+
+   data = elf_newdata(s);
+   if (!data) {
+   WARN_ELF("elf_newdata");
+   return -1;
+   }
+
+   data->d_buf = str;
+   data->d_size = strlen(str) + 1;
+   data->d_align = 1;
+
+   len = strtab->len;
+   strtab->len += data->d_size;
+   strtab->changed = true;
+
+   return len;
+}
+
 struct section *elf_create_section(struct elf *elf, const char *name,
   unsigned int sh_flags, size_t entsize, int 
nr)
 {
struct section *sec, *shstrtab;
size_t size = entsize * nr;
Elf_Scn *s;
-   Elf_Data *data;
 
sec = malloc(sizeof(*sec));
if (!sec) {
@@ -736,7 +771,6 @@ struct section *elf_create_section(struct elf *elf, const 
char *name,
sec->sh.sh_addralign = 1;
sec->sh.sh_flags = SHF_ALLOC | sh_flags;
 
-
/* Add section name to .shstrtab (or .strtab for Clang) */
shstrtab = find_section_by_name(elf, ".shstrtab");
if (!shstrtab)
@@ -745,27 +779,9 @@ struct section *elf_create_section(struct elf *elf, const 
char *name,
WARN("can't find .shstrtab or .strtab section");
return NULL;
}
-
-   s = elf_getscn(elf->elf, shstrtab->idx);
-   if (!s) {
-   WARN_ELF("elf_getscn");
+   sec->sh.sh_name = elf_add_string(elf, shstrtab, sec->name);
+   if (sec->sh.sh_name == -1)
return NULL;
-   }
-
-   data = elf_newdata(s);
-   if (!data) {
-   WARN_ELF("elf_newdata");
-   return NULL;
-   }
-
-   data->d_buf = sec->name;
-   data->d_size = strlen(name) + 1;
-   data->d_align = 1;
-
-   sec->sh.sh_name = shstrtab->len;
-
-   shstrtab->len += strlen(name) + 1;
-   shstrtab->changed = true;
 
list_add_tail(>list, >sections);
elf_hash_add(elf->section_hash, >hash, sec->idx);


[tip: x86/core] objtool: Handle per arch retpoline naming

2021-04-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 3b652980a250c1ed9e0c361750f029781831cdc3
Gitweb:
https://git.kernel.org/tip/3b652980a250c1ed9e0c361750f029781831cdc3
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:04 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 01 Apr 2021 11:36:52 +02:00

objtool: Handle per arch retpoline naming

The __x86_indirect_ naming is obviously not generic. Shorten to allow
matching some additional magic names later.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151259.630296...@infradead.org
---
 tools/objtool/arch/x86/decode.c  |  5 +
 tools/objtool/check.c|  9 +++--
 tools/objtool/include/objtool/arch.h |  2 ++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 8380d0b..e5fa3a5 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -645,3 +645,8 @@ int arch_decode_hint_reg(struct instruction *insn, u8 
sp_reg)
 
return 0;
 }
+
+bool arch_is_retpoline(struct symbol *sym)
+{
+   return !strncmp(sym->name, "__x86_indirect_", 15);
+}
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 519af4b..6fbc001 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -850,6 +850,11 @@ static int add_ignore_alternatives(struct objtool_file 
*file)
return 0;
 }
 
+__weak bool arch_is_retpoline(struct symbol *sym)
+{
+   return false;
+}
+
 /*
  * Find the destination instructions for all jumps.
  */
@@ -872,7 +877,7 @@ static int add_jump_destinations(struct objtool_file *file)
} else if (reloc->sym->type == STT_SECTION) {
dest_sec = reloc->sym->sec;
dest_off = arch_dest_reloc_offset(reloc->addend);
-   } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 
21)) {
+   } else if (arch_is_retpoline(reloc->sym)) {
/*
 * Retpoline jumps are really dynamic jumps in
 * disguise, so convert them accordingly.
@@ -1026,7 +1031,7 @@ static int add_call_destinations(struct objtool_file 
*file)
return -1;
}
 
-   } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 
21)) {
+   } else if (arch_is_retpoline(reloc->sym)) {
/*
 * Retpoline calls are really dynamic calls in
 * disguise, so convert them accordingly.
diff --git a/tools/objtool/include/objtool/arch.h 
b/tools/objtool/include/objtool/arch.h
index 6ff0685..bb30993 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -86,4 +86,6 @@ const char *arch_nop_insn(int len);
 
 int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg);
 
+bool arch_is_retpoline(struct symbol *sym);
+
 #endif /* _ARCH_H */


[tip: x86/core] objtool: Add elf_create_reloc() helper

2021-04-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 7508e2958a82675e75e34221c26ad4242d4ef283
Gitweb:
https://git.kernel.org/tip/7508e2958a82675e75e34221c26ad4242d4ef283
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:07 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 01 Apr 2021 12:55:55 +02:00

objtool: Add elf_create_reloc() helper

We have 4 instances of adding a relocation. Create a common helper
to avoid growing even more.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151259.817438...@infradead.org
---
 tools/objtool/check.c   | 78 +
 tools/objtool/elf.c | 86 ++--
 tools/objtool/include/objtool/elf.h | 10 ++-
 tools/objtool/orc_gen.c | 30 ++
 4 files changed, 85 insertions(+), 119 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 1d0415b..61fe29a 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -433,8 +433,7 @@ reachable:
 
 static int create_static_call_sections(struct objtool_file *file)
 {
-   struct section *sec, *reloc_sec;
-   struct reloc *reloc;
+   struct section *sec;
struct static_call_site *site;
struct instruction *insn;
struct symbol *key_sym;
@@ -460,8 +459,7 @@ static int create_static_call_sections(struct objtool_file 
*file)
if (!sec)
return -1;
 
-   reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
-   if (!reloc_sec)
+   if (!elf_create_reloc_section(file->elf, sec, SHT_RELA))
return -1;
 
idx = 0;
@@ -471,25 +469,11 @@ static int create_static_call_sections(struct 
objtool_file *file)
memset(site, 0, sizeof(struct static_call_site));
 
/* populate reloc for 'addr' */
-   reloc = malloc(sizeof(*reloc));
-
-   if (!reloc) {
-   perror("malloc");
-   return -1;
-   }
-   memset(reloc, 0, sizeof(*reloc));
-
-   insn_to_reloc_sym_addend(insn->sec, insn->offset, reloc);
-   if (!reloc->sym) {
-   WARN_FUNC("static call tramp: missing containing 
symbol",
- insn->sec, insn->offset);
+   if (elf_add_reloc_to_insn(file->elf, sec,
+ idx * sizeof(struct static_call_site),
+ R_X86_64_PC32,
+ insn->sec, insn->offset))
return -1;
-   }
-
-   reloc->type = R_X86_64_PC32;
-   reloc->offset = idx * sizeof(struct static_call_site);
-   reloc->sec = reloc_sec;
-   elf_add_reloc(file->elf, reloc);
 
/* find key symbol */
key_name = strdup(insn->call_dest->name);
@@ -526,18 +510,11 @@ static int create_static_call_sections(struct 
objtool_file *file)
free(key_name);
 
/* populate reloc for 'key' */
-   reloc = malloc(sizeof(*reloc));
-   if (!reloc) {
-   perror("malloc");
+   if (elf_add_reloc(file->elf, sec,
+ idx * sizeof(struct static_call_site) + 4,
+ R_X86_64_PC32, key_sym,
+ is_sibling_call(insn) * 
STATIC_CALL_SITE_TAIL))
return -1;
-   }
-   memset(reloc, 0, sizeof(*reloc));
-   reloc->sym = key_sym;
-   reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 
0;
-   reloc->type = R_X86_64_PC32;
-   reloc->offset = idx * sizeof(struct static_call_site) + 4;
-   reloc->sec = reloc_sec;
-   elf_add_reloc(file->elf, reloc);
 
idx++;
}
@@ -547,8 +524,7 @@ static int create_static_call_sections(struct objtool_file 
*file)
 
 static int create_mcount_loc_sections(struct objtool_file *file)
 {
-   struct section *sec, *reloc_sec;
-   struct reloc *reloc;
+   struct section *sec;
unsigned long *loc;
struct instruction *insn;
int idx;
@@ -571,8 +547,7 @@ static int create_mcount_loc_sections(struct objtool_file 
*file)
if (!sec)
return -1;
 
-   reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
-   if (!reloc_sec)
+   if (!elf_create_reloc_section(file->elf, sec, SHT_RELA))
return -1;
 
idx = 0;
@@ -581,32 +556,11 @@ static int create_mcount_loc_sections(struct objtool_file 
*file)
loc = (unsigned long *)sec->data->d_buf + idx;
memset(loc, 0, sizeof(unsigned long));
 
-  

[tip: x86/core] objtool: Implicitly create reloc sections

2021-04-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: aef0f13e96db08f31be6b96d28e761df46d86ff4
Gitweb:
https://git.kernel.org/tip/aef0f13e96db08f31be6b96d28e761df46d86ff4
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:08 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 01 Apr 2021 13:01:15 +02:00

objtool: Implicitly create reloc sections

Have elf_add_reloc() create the relocation section implicitly.

Suggested-by: Josh Poimboeuf 
Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151259.880174...@infradead.org
---
 tools/objtool/check.c   |  6 --
 tools/objtool/elf.c |  9 -
 tools/objtool/include/objtool/elf.h |  1 -
 tools/objtool/orc_gen.c |  2 --
 4 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 61fe29a..600fa67 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -459,9 +459,6 @@ static int create_static_call_sections(struct objtool_file 
*file)
if (!sec)
return -1;
 
-   if (!elf_create_reloc_section(file->elf, sec, SHT_RELA))
-   return -1;
-
idx = 0;
list_for_each_entry(insn, >static_call_list, static_call_node) {
 
@@ -547,9 +544,6 @@ static int create_mcount_loc_sections(struct objtool_file 
*file)
if (!sec)
return -1;
 
-   if (!elf_create_reloc_section(file->elf, sec, SHT_RELA))
-   return -1;
-
idx = 0;
list_for_each_entry(insn, >mcount_loc_list, mcount_loc_node) {
 
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 0ab52ac..7b65ae3 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -447,11 +447,18 @@ err:
return -1;
 }
 
+static struct section *elf_create_reloc_section(struct elf *elf,
+   struct section *base,
+   int reltype);
+
 int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
  unsigned int type, struct symbol *sym, int addend)
 {
struct reloc *reloc;
 
+   if (!sec->reloc && !elf_create_reloc_section(elf, sec, SHT_RELA))
+   return -1;
+
reloc = malloc(sizeof(*reloc));
if (!reloc) {
perror("malloc");
@@ -829,7 +836,7 @@ static struct section *elf_create_rela_reloc_section(struct 
elf *elf, struct sec
return sec;
 }
 
-struct section *elf_create_reloc_section(struct elf *elf,
+static struct section *elf_create_reloc_section(struct elf *elf,
 struct section *base,
 int reltype)
 {
diff --git a/tools/objtool/include/objtool/elf.h 
b/tools/objtool/include/objtool/elf.h
index 825ad32..463f329 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -122,7 +122,6 @@ static inline u32 reloc_hash(struct reloc *reloc)
 
 struct elf *elf_open_read(const char *name, int flags);
 struct section *elf_create_section(struct elf *elf, const char *name, unsigned 
int sh_flags, size_t entsize, int nr);
-struct section *elf_create_reloc_section(struct elf *elf, struct section 
*base, int reltype);
 
 int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
  unsigned int type, struct symbol *sym, int addend);
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 1b57be6..dc9b7dd 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -225,8 +225,6 @@ int orc_create(struct objtool_file *file)
sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), 
nr);
if (!sec)
return -1;
-   if (!elf_create_reloc_section(file->elf, sec, SHT_RELA))
-   return -1;
 
/* Write ORC entries to sections: */
list_for_each_entry(entry, _list, list) {


[tip: x86/core] objtool: Skip magical retpoline .altinstr_replacement

2021-04-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 68a59124f4c6363de619fea63231a97dd220a12c
Gitweb:
https://git.kernel.org/tip/68a59124f4c6363de619fea63231a97dd220a12c
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:14 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 01 Apr 2021 13:29:40 +02:00

objtool: Skip magical retpoline .altinstr_replacement

When the .altinstr_replacement is a retpoline, skip the alternative.
We already special case retpolines anyway.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151300.259429...@infradead.org
---
 tools/objtool/special.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index 2c7fbda..07b21cf 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -106,6 +106,14 @@ static int get_alt_entry(struct elf *elf, struct 
special_entry *entry,
return -1;
}
 
+   /*
+* Skip retpoline .altinstr_replacement... we already rewrite 
the
+* instructions for retpolines anyway, see arch_is_retpoline()
+* usage in add_{call,jump}_destinations().
+*/
+   if (arch_is_retpoline(new_reloc->sym))
+   return 1;
+
alt->new_sec = new_reloc->sym->sec;
alt->new_off = (unsigned int)new_reloc->addend;
 
@@ -154,7 +162,9 @@ int special_get_alts(struct elf *elf, struct list_head 
*alts)
memset(alt, 0, sizeof(*alt));
 
ret = get_alt_entry(elf, entry, sec, idx, alt);
-   if (ret)
+   if (ret > 0)
+   continue;
+   if (ret < 0)
return ret;
 
list_add_tail(>list, alts);


[tip: x86/core] objtool: Add elf_create_undef_symbol()

2021-04-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 993b477acdb652c6134e5faae05e8a378911cbb3
Gitweb:
https://git.kernel.org/tip/993b477acdb652c6134e5faae05e8a378911cbb3
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:11 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 01 Apr 2021 13:12:48 +02:00

objtool: Add elf_create_undef_symbol()

Allow objtool to create undefined symbols; this allows creating
relocations to symbols not currently in the symbol table.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151300.064743...@infradead.org
---
 tools/objtool/elf.c | 60 -
 tools/objtool/include/objtool/elf.h |  1 +-
 2 files changed, 61 insertions(+)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 8457218..d08f5f3 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -715,6 +715,66 @@ static int elf_add_string(struct elf *elf, struct section 
*strtab, char *str)
return len;
 }
 
+struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
+{
+   struct section *symtab;
+   struct symbol *sym;
+   Elf_Data *data;
+   Elf_Scn *s;
+
+   sym = malloc(sizeof(*sym));
+   if (!sym) {
+   perror("malloc");
+   return NULL;
+   }
+   memset(sym, 0, sizeof(*sym));
+
+   sym->name = strdup(name);
+
+   sym->sym.st_name = elf_add_string(elf, NULL, sym->name);
+   if (sym->sym.st_name == -1)
+   return NULL;
+
+   sym->sym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE);
+   // st_other 0
+   // st_shndx 0
+   // st_value 0
+   // st_size 0
+
+   symtab = find_section_by_name(elf, ".symtab");
+   if (!symtab) {
+   WARN("can't find .symtab");
+   return NULL;
+   }
+
+   s = elf_getscn(elf->elf, symtab->idx);
+   if (!s) {
+   WARN_ELF("elf_getscn");
+   return NULL;
+   }
+
+   data = elf_newdata(s);
+   if (!data) {
+   WARN_ELF("elf_newdata");
+   return NULL;
+   }
+
+   data->d_buf = >sym;
+   data->d_size = sizeof(sym->sym);
+   data->d_align = 1;
+
+   sym->idx = symtab->len / sizeof(sym->sym);
+
+   symtab->len += data->d_size;
+   symtab->changed = true;
+
+   sym->sec = find_section_by_index(elf, 0);
+
+   elf_add_symbol(elf, sym);
+
+   return sym;
+}
+
 struct section *elf_create_section(struct elf *elf, const char *name,
   unsigned int sh_flags, size_t entsize, int 
nr)
 {
diff --git a/tools/objtool/include/objtool/elf.h 
b/tools/objtool/include/objtool/elf.h
index 463f329..45e5ede 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -133,6 +133,7 @@ int elf_write_insn(struct elf *elf, struct section *sec,
   unsigned long offset, unsigned int len,
   const char *insn);
 int elf_write_reloc(struct elf *elf, struct reloc *reloc);
+struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name);
 int elf_write(struct elf *elf);
 void elf_close(struct elf *elf);
 


[tip: x86/core] objtool: Keep track of retpoline call sites

2021-04-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 7e57a6bc5a22145429d3a232619b0637c312397a
Gitweb:
https://git.kernel.org/tip/7e57a6bc5a22145429d3a232619b0637c312397a
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:12 +01:00
Committer: Borislav Petkov 
CommitterDate: Thu, 01 Apr 2021 13:20:21 +02:00

objtool: Keep track of retpoline call sites

Provide infrastructure for architectures to rewrite/augment compiler
generated retpoline calls. Similar to what we do for static_call()s,
keep track of the instructions that are retpoline calls.

Use the same list_head, since a retpoline call cannot also be a
static_call.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Reviewed-by: Miroslav Benes 
Link: https://lkml.kernel.org/r/20210326151300.130805...@infradead.org
---
 tools/objtool/check.c   | 34 
 tools/objtool/include/objtool/arch.h|  2 +-
 tools/objtool/include/objtool/check.h   |  2 +-
 tools/objtool/include/objtool/objtool.h |  1 +-
 tools/objtool/objtool.c |  1 +-
 5 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 600fa67..77074db 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -451,7 +451,7 @@ static int create_static_call_sections(struct objtool_file 
*file)
return 0;
 
idx = 0;
-   list_for_each_entry(insn, >static_call_list, static_call_node)
+   list_for_each_entry(insn, >static_call_list, call_node)
idx++;
 
sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE,
@@ -460,7 +460,7 @@ static int create_static_call_sections(struct objtool_file 
*file)
return -1;
 
idx = 0;
-   list_for_each_entry(insn, >static_call_list, static_call_node) {
+   list_for_each_entry(insn, >static_call_list, call_node) {
 
site = (struct static_call_site *)sec->data->d_buf + idx;
memset(site, 0, sizeof(struct static_call_site));
@@ -829,13 +829,16 @@ static int add_jump_destinations(struct objtool_file 
*file)
else
insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL;
 
+   list_add_tail(>call_node,
+ >retpoline_call_list);
+
insn->retpoline_safe = true;
continue;
} else if (insn->func) {
/* internal or external sibling call (with reloc) */
insn->call_dest = reloc->sym;
if (insn->call_dest->static_call_tramp) {
-   list_add_tail(>static_call_node,
+   list_add_tail(>call_node,
  >static_call_list);
}
continue;
@@ -897,7 +900,7 @@ static int add_jump_destinations(struct objtool_file *file)
/* internal sibling call (without reloc) */
insn->call_dest = insn->jump_dest->func;
if (insn->call_dest->static_call_tramp) {
-   list_add_tail(>static_call_node,
+   list_add_tail(>call_node,
  >static_call_list);
}
}
@@ -981,6 +984,9 @@ static int add_call_destinations(struct objtool_file *file)
insn->type = INSN_CALL_DYNAMIC;
insn->retpoline_safe = true;
 
+   list_add_tail(>call_node,
+ >retpoline_call_list);
+
remove_insn_ops(insn);
continue;
 
@@ -988,7 +994,7 @@ static int add_call_destinations(struct objtool_file *file)
insn->call_dest = reloc->sym;
 
if (insn->call_dest && insn->call_dest->static_call_tramp) {
-   list_add_tail(>static_call_node,
+   list_add_tail(>call_node,
  >static_call_list);
}
 
@@ -1714,6 +1720,11 @@ static void mark_rodata(struct objtool_file *file)
file->rodata = found;
 }
 
+__weak int arch_rewrite_retpolines(struct objtool_file *file)
+{
+   return 0;
+}
+
 static int decode_sections(struct objtool_file *file)
 {
int ret;
@@ -1742,6 +1753,10 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
 
+   /*
+* Must be before add_special_section_alts() as that depends on
+* jump_dest being set.
+*/
ret = add_jump_destinations(file);
if (ret)
return ret;
@@ -1778,6 +1793,15 @@ static int 

[tip: x86/core] x86/retpoline: Simplify retpolines

2021-04-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/core branch of tip:

Commit-ID: 2077915516ebb06d36e03cb542ccb833a8b0a3eb
Gitweb:
https://git.kernel.org/tip/2077915516ebb06d36e03cb542ccb833a8b0a3eb
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Mar 2021 16:12:02 +01:00
Committer: Borislav Petkov 
CommitterDate: Wed, 31 Mar 2021 22:31:57 +02:00

x86/retpoline: Simplify retpolines

Due to

  c9c324dc22aa ("objtool: Support stack layout changes in alternatives")

it is possible to simplify the retpolines.

Currently our retpolines consist of 2 symbols:

 - __x86_indirect_thunk_\reg: the compiler target
 - __x86_retpoline_\reg:  the actual retpoline.

Both are consecutive in code and aligned such that for any one register
they both live in the same cacheline:

   <__x86_indirect_thunk_rax>:
   0:   ff e0   jmpq   *%rax
   2:   90  nop
   3:   90  nop
   4:   90  nop

  0005 <__x86_retpoline_rax>:
   5:   e8 07 00 00 00  callq  11 <__x86_retpoline_rax+0xc>
   a:   f3 90   pause
   c:   0f ae e8lfence
   f:   eb f9   jmpa <__x86_retpoline_rax+0x5>
  11:   48 89 04 24 mov%rax,(%rsp)
  15:   c3  retq
  16:   66 2e 0f 1f 84 00 00 00 00 00   nopw   %cs:0x0(%rax,%rax,1)

The thunk is an alternative_2, where one option is a jmp to the
retpoline. This was done so that objtool didn't need to deal with
alternatives with stack ops. But that problem has been solved, so now
it is possible to fold the entire retpoline into the alternative to
simplify and consolidate unused bytes:

   <__x86_indirect_thunk_rax>:
   0:   ff e0   jmpq   *%rax
   2:   90  nop
   3:   90  nop
   4:   90  nop
   5:   90  nop
   6:   90  nop
   7:   90  nop
   8:   90  nop
   9:   90  nop
   a:   90  nop
   b:   90  nop
   c:   90  nop
   d:   90  nop
   e:   90  nop
   f:   90  nop
  10:   90  nop
  11:   66 66 2e 0f 1f 84 00 00 00 00 00data16 nopw %cs:0x0(%rax,%rax,1)
  1c:   0f 1f 40 00 nopl   0x0(%rax)

Notice that since the longest alternative sequence is now:

   0:   e8 07 00 00 00  callq  c <.altinstr_replacement+0xc>
   5:   f3 90   pause
   7:   0f ae e8lfence
   a:   eb f9   jmp5 <.altinstr_replacement+0x5>
   c:   48 89 04 24 mov%rax,(%rsp)
  10:   c3  retq

17 bytes, we have 15 bytes NOP at the end of our 32 byte slot. (IOW, if
we can shrink the retpoline by 1 byte we can pack it more densely).

 [ bp: Massage commit message. ]

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Link: https://lkml.kernel.org/r/20210326151259.506071...@infradead.org
---
 arch/x86/include/asm/asm-prototypes.h |  7 +-
 arch/x86/include/asm/nospec-branch.h  |  6 ++---
 arch/x86/lib/retpoline.S  | 34 +-
 tools/objtool/check.c |  3 +--
 4 files changed, 21 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/asm-prototypes.h 
b/arch/x86/include/asm/asm-prototypes.h
index 51e2bf2..0545b07 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -22,15 +22,8 @@ extern void cmpxchg8b_emu(void);
 #define DECL_INDIRECT_THUNK(reg) \
extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
 
-#define DECL_RETPOLINE(reg) \
-   extern asmlinkage void __x86_retpoline_ ## reg (void);
-
 #undef GEN
 #define GEN(reg) DECL_INDIRECT_THUNK(reg)
 #include 
 
-#undef GEN
-#define GEN(reg) DECL_RETPOLINE(reg)
-#include 
-
 #endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/nospec-branch.h 
b/arch/x86/include/asm/nospec-branch.h
index 529f8e9..664be73 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -80,7 +80,7 @@
 .macro JMP_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
- __stringify(jmp __x86_retpoline_\reg), 
X86_FEATURE_RETPOLINE, \
+ __stringify(jmp __x86_indirect_thunk_\reg), 
X86_FEATURE_RETPOLINE, \
  __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), 
X86_FEATURE_RETPOLINE_AMD
 #else
jmp *%\reg
@@ -90,7 +90,7 @@
 .macro CALL_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
- __stringify(call __x86_retpoline_\reg), 
X86_FEATURE_RETPOLINE, \
+ __stringify(call 

[tip: locking/urgent] static_call: Align static_call_is_init() patching condition

2021-03-19 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the locking/urgent branch of tip:

Commit-ID: 698bacefe993ad2922c9d3b1380591ad489355e9
Gitweb:
https://git.kernel.org/tip/698bacefe993ad2922c9d3b1380591ad489355e9
Author:Peter Zijlstra 
AuthorDate:Thu, 18 Mar 2021 11:29:56 +01:00
Committer: Peter Zijlstra 
CommitterDate: Fri, 19 Mar 2021 13:16:44 +01:00

static_call: Align static_call_is_init() patching condition

The intent is to avoid writing init code after init (because the text
might have been freed). The code is needlessly different between
jump_label and static_call and not obviously correct.

The existing code relies on the fact that the module loader clears the
init layout, such that within_module_init() always fails, while
jump_label relies on the module state which is more obvious and
matches the kernel logic.

Signed-off-by: Peter Zijlstra (Intel) 
Acked-by: Jarkko Sakkinen 
Tested-by: Sumit Garg 
Link: https://lkml.kernel.org/r/20210318113610.636651...@infradead.org
---
 kernel/static_call.c | 14 --
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/kernel/static_call.c b/kernel/static_call.c
index 080c8a9..fc22590 100644
--- a/kernel/static_call.c
+++ b/kernel/static_call.c
@@ -149,6 +149,7 @@ void __static_call_update(struct static_call_key *key, void 
*tramp, void *func)
};
 
for (site_mod =  site_mod; site_mod = site_mod->next) {
+   bool init = system_state < SYSTEM_RUNNING;
struct module *mod = site_mod->mod;
 
if (!site_mod->sites) {
@@ -168,6 +169,7 @@ void __static_call_update(struct static_call_key *key, void 
*tramp, void *func)
if (mod) {
stop = mod->static_call_sites +
   mod->num_static_call_sites;
+   init = mod->state == MODULE_STATE_COMING;
}
 #endif
 
@@ -175,16 +177,8 @@ void __static_call_update(struct static_call_key *key, 
void *tramp, void *func)
 site < stop && static_call_key(site) == key; site++) {
void *site_addr = static_call_addr(site);
 
-   if (static_call_is_init(site)) {
-   /*
-* Don't write to call sites which were in
-* initmem and have since been freed.
-*/
-   if (!mod && system_state >= SYSTEM_RUNNING)
-   continue;
-   if (mod && !within_module_init((unsigned 
long)site_addr, mod))
-   continue;
-   }
+   if (!init && static_call_is_init(site))
+   continue;
 
if (!kernel_text_address((unsigned long)site_addr)) {
WARN_ONCE(1, "can't patch static call site at 
%pS",


[tip: locking/urgent] static_call: Fix static_call_update() sanity check

2021-03-19 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the locking/urgent branch of tip:

Commit-ID: 38c93587375053c5b9ef093f4a5ea754538cba32
Gitweb:
https://git.kernel.org/tip/38c93587375053c5b9ef093f4a5ea754538cba32
Author:Peter Zijlstra 
AuthorDate:Thu, 18 Mar 2021 11:31:51 +01:00
Committer: Peter Zijlstra 
CommitterDate: Fri, 19 Mar 2021 13:16:44 +01:00

static_call: Fix static_call_update() sanity check

Sites that match init_section_contains() get marked as INIT. For
built-in code init_sections contains both __init and __exit text. OTOH
kernel_text_address() only explicitly includes __init text (and there
are no __exit text markers).

Match what jump_label already does and ignore the warning for INIT
sites. Also see the excellent changelog for commit: 8f35eaa5f2de
("jump_label: Don't warn on __exit jump entries")

Fixes: 9183c3f9ed710 ("static_call: Add inline static call infrastructure")
Reported-by: Sumit Garg 
Signed-off-by: Peter Zijlstra (Intel) 
Acked-by: Jarkko Sakkinen 
Tested-by: Sumit Garg 
Link: https://lkml.kernel.org/r/20210318113610.739542...@infradead.org
---
 kernel/jump_label.c  |  8 
 kernel/static_call.c | 11 ++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index c6a39d6..ba39fbb 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -407,6 +407,14 @@ static bool jump_label_can_update(struct jump_entry 
*entry, bool init)
return false;
 
if (!kernel_text_address(jump_entry_code(entry))) {
+   /*
+* This skips patching built-in __exit, which
+* is part of init_section_contains() but is
+* not part of kernel_text_address().
+*
+* Skipping built-in __exit is fine since it
+* will never be executed.
+*/
WARN_ONCE(!jump_entry_is_init(entry),
  "can't patch jump_label at %pS",
  (void *)jump_entry_code(entry));
diff --git a/kernel/static_call.c b/kernel/static_call.c
index fc22590..2c5950b 100644
--- a/kernel/static_call.c
+++ b/kernel/static_call.c
@@ -181,7 +181,16 @@ void __static_call_update(struct static_call_key *key, 
void *tramp, void *func)
continue;
 
if (!kernel_text_address((unsigned long)site_addr)) {
-   WARN_ONCE(1, "can't patch static call site at 
%pS",
+   /*
+* This skips patching built-in __exit, which
+* is part of init_section_contains() but is
+* not part of kernel_text_address().
+*
+* Skipping built-in __exit is fine since it
+* will never be executed.
+*/
+   WARN_ONCE(!static_call_is_init(site),
+ "can't patch static call site at %pS",
  site_addr);
continue;
}


[tip: locking/urgent] static_call: Fix static_call_set_init()

2021-03-19 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the locking/urgent branch of tip:

Commit-ID: 68b1eddd421d2b16c6655eceb48918a1e896bbbc
Gitweb:
https://git.kernel.org/tip/68b1eddd421d2b16c6655eceb48918a1e896bbbc
Author:Peter Zijlstra 
AuthorDate:Thu, 18 Mar 2021 11:27:19 +01:00
Committer: Peter Zijlstra 
CommitterDate: Fri, 19 Mar 2021 13:16:44 +01:00

static_call: Fix static_call_set_init()

It turns out that static_call_set_init() does not preserve the other
flags; IOW. it clears TAIL if it was set.

Fixes: 9183c3f9ed710 ("static_call: Add inline static call infrastructure")
Reported-by: Sumit Garg 
Signed-off-by: Peter Zijlstra (Intel) 
Acked-by: Jarkko Sakkinen 
Tested-by: Sumit Garg 
Link: https://lkml.kernel.org/r/20210318113610.519406...@infradead.org
---
 kernel/static_call.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/kernel/static_call.c b/kernel/static_call.c
index ae82529..080c8a9 100644
--- a/kernel/static_call.c
+++ b/kernel/static_call.c
@@ -35,27 +35,30 @@ static inline void *static_call_addr(struct 
static_call_site *site)
return (void *)((long)site->addr + (long)>addr);
 }
 
+static inline unsigned long __static_call_key(const struct static_call_site 
*site)
+{
+   return (long)site->key + (long)>key;
+}
 
 static inline struct static_call_key *static_call_key(const struct 
static_call_site *site)
 {
-   return (struct static_call_key *)
-   (((long)site->key + (long)>key) & 
~STATIC_CALL_SITE_FLAGS);
+   return (void *)(__static_call_key(site) & ~STATIC_CALL_SITE_FLAGS);
 }
 
 /* These assume the key is word-aligned. */
 static inline bool static_call_is_init(struct static_call_site *site)
 {
-   return ((long)site->key + (long)>key) & STATIC_CALL_SITE_INIT;
+   return __static_call_key(site) & STATIC_CALL_SITE_INIT;
 }
 
 static inline bool static_call_is_tail(struct static_call_site *site)
 {
-   return ((long)site->key + (long)>key) & STATIC_CALL_SITE_TAIL;
+   return __static_call_key(site) & STATIC_CALL_SITE_TAIL;
 }
 
 static inline void static_call_set_init(struct static_call_site *site)
 {
-   site->key = ((long)static_call_key(site) | STATIC_CALL_SITE_INIT) -
+   site->key = (__static_call_key(site) | STATIC_CALL_SITE_INIT) -
(long)>key;
 }
 
@@ -190,7 +193,7 @@ void __static_call_update(struct static_call_key *key, void 
*tramp, void *func)
}
 
arch_static_call_transform(site_addr, NULL, func,
-   static_call_is_tail(site));
+  static_call_is_tail(site));
}
}
 
@@ -349,7 +352,7 @@ static int static_call_add_module(struct module *mod)
struct static_call_site *site;
 
for (site = start; site != stop; site++) {
-   unsigned long s_key = (long)site->key + (long)>key;
+   unsigned long s_key = __static_call_key(site);
unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS;
unsigned long key;
 


[tip: irq/core] tasklets: Replace spin wait in tasklet_unlock_wait()

2021-03-17 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the irq/core branch of tip:

Commit-ID: da044747401fc16202e223c9da970ed4e84fd84d
Gitweb:
https://git.kernel.org/tip/da044747401fc16202e223c9da970ed4e84fd84d
Author:Peter Zijlstra 
AuthorDate:Tue, 09 Mar 2021 09:42:08 +01:00
Committer: Thomas Gleixner 
CommitterDate: Wed, 17 Mar 2021 16:33:55 +01:00

tasklets: Replace spin wait in tasklet_unlock_wait()

tasklet_unlock_wait() spin waits for TASKLET_STATE_RUN to be cleared. This
is wasting CPU cycles in a tight loop which is especially painful in a
guest when the CPU running the tasklet is scheduled out.

tasklet_unlock_wait() is invoked from tasklet_kill() which is used in
teardown paths and not performance critical at all. Replace the spin wait
with wait_var_event().

There are no users of tasklet_unlock_wait() which are invoked from atomic
contexts. The usage in tasklet_disable() has been replaced temporarily with
the spin waiting variant until the atomic users are fixed up and will be
converted to the sleep wait variant later.

Signed-off-by: Peter Zijlstra 
Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lore.kernel.org/r/20210309084241.783936...@linutronix.de

---
 include/linux/interrupt.h | 13 ++---
 kernel/softirq.c  | 18 ++
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index b7f0012..b50be4f 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -664,17 +664,8 @@ static inline int tasklet_trylock(struct tasklet_struct *t)
return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
 }
 
-static inline void tasklet_unlock(struct tasklet_struct *t)
-{
-   smp_mb__before_atomic();
-   clear_bit(TASKLET_STATE_RUN, &(t)->state);
-}
-
-static inline void tasklet_unlock_wait(struct tasklet_struct *t)
-{
-   while (test_bit(TASKLET_STATE_RUN, >state))
-   cpu_relax();
-}
+void tasklet_unlock(struct tasklet_struct *t);
+void tasklet_unlock_wait(struct tasklet_struct *t);
 
 /*
  * Do not use in new code. Waiting for tasklets from atomic contexts is
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 8d56bbf..ef6429a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -632,6 +633,23 @@ void tasklet_kill(struct tasklet_struct *t)
 }
 EXPORT_SYMBOL(tasklet_kill);
 
+#ifdef CONFIG_SMP
+void tasklet_unlock(struct tasklet_struct *t)
+{
+   smp_mb__before_atomic();
+   clear_bit(TASKLET_STATE_RUN, >state);
+   smp_mb__after_atomic();
+   wake_up_var(>state);
+}
+EXPORT_SYMBOL_GPL(tasklet_unlock);
+
+void tasklet_unlock_wait(struct tasklet_struct *t)
+{
+   wait_var_event(>state, !test_bit(TASKLET_STATE_RUN, >state));
+}
+EXPORT_SYMBOL_GPL(tasklet_unlock_wait);
+#endif
+
 void __init softirq_init(void)
 {
int cpu;


[tip: irq/core] tasklets: Replace spin wait in tasklet_kill()

2021-03-17 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the irq/core branch of tip:

Commit-ID: 697d8c63c4a2991a22a896a5e6adcdbb28fefe56
Gitweb:
https://git.kernel.org/tip/697d8c63c4a2991a22a896a5e6adcdbb28fefe56
Author:Peter Zijlstra 
AuthorDate:Tue, 09 Mar 2021 09:42:09 +01:00
Committer: Thomas Gleixner 
CommitterDate: Wed, 17 Mar 2021 16:33:57 +01:00

tasklets: Replace spin wait in tasklet_kill()

tasklet_kill() spin waits for TASKLET_STATE_SCHED to be cleared invoking
yield() from inside the loop. yield() is an ill defined mechanism and the
result might still be wasting CPU cycles in a tight loop which is
especially painful in a guest when the CPU running the tasklet is scheduled
out.

tasklet_kill() is used in teardown paths and not performance critical at
all. Replace the spin wait with wait_var_event().

Signed-off-by: Peter Zijlstra 
Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lore.kernel.org/r/20210309084241.890532...@linutronix.de

---
 kernel/softirq.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index ef6429a..ba89ca7 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -532,10 +532,12 @@ void __tasklet_hi_schedule(struct tasklet_struct *t)
 }
 EXPORT_SYMBOL(__tasklet_hi_schedule);
 
-static bool tasklet_should_run(struct tasklet_struct *t)
+static bool tasklet_clear_sched(struct tasklet_struct *t)
 {
-   if (test_and_clear_bit(TASKLET_STATE_SCHED, >state))
+   if (test_and_clear_bit(TASKLET_STATE_SCHED, >state)) {
+   wake_up_var(>state);
return true;
+   }
 
WARN_ONCE(1, "tasklet SCHED state not set: %s %pS\n",
  t->use_callback ? "callback" : "func",
@@ -563,7 +565,7 @@ static void tasklet_action_common(struct softirq_action *a,
 
if (tasklet_trylock(t)) {
if (!atomic_read(>count)) {
-   if (tasklet_should_run(t)) {
+   if (tasklet_clear_sched(t)) {
if (t->use_callback)
t->callback(t);
else
@@ -623,13 +625,11 @@ void tasklet_kill(struct tasklet_struct *t)
if (in_interrupt())
pr_notice("Attempt to kill tasklet from interrupt\n");
 
-   while (test_and_set_bit(TASKLET_STATE_SCHED, >state)) {
-   do {
-   yield();
-   } while (test_bit(TASKLET_STATE_SCHED, >state));
-   }
+   while (test_and_set_bit(TASKLET_STATE_SCHED, >state))
+   wait_var_event(>state, !test_bit(TASKLET_STATE_SCHED, 
>state));
+
tasklet_unlock_wait(t);
-   clear_bit(TASKLET_STATE_SCHED, >state);
+   tasklet_clear_sched(t);
 }
 EXPORT_SYMBOL(tasklet_kill);
 


[tip: x86/cpu] x86: Remove dynamic NOP selection

2021-03-15 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/cpu branch of tip:

Commit-ID: a89dfde3dc3c2dbf56910af75e2d8b11ec5308f6
Gitweb:
https://git.kernel.org/tip/a89dfde3dc3c2dbf56910af75e2d8b11ec5308f6
Author:Peter Zijlstra 
AuthorDate:Fri, 12 Mar 2021 12:32:54 +01:00
Committer: Borislav Petkov 
CommitterDate: Mon, 15 Mar 2021 16:24:59 +01:00

x86: Remove dynamic NOP selection

This ensures that a NOP is a NOP and not a random other instruction that
is also a NOP. It allows simplification of dynamic code patching that
wants to verify existing code before writing new instructions (ftrace,
jump_label, static_call, etc..).

Differentiating on NOPs is not a feature.

This pessimises 32bit (DONTCARE) and 32bit on 64bit CPUs (CARELESS).
32bit is not a performance target.

Everything x86_64 since AMD K10 (2007) and Intel IvyBridge (2012) is
fine with using NOPL (as opposed to prefix NOP). And per FEATURE_NOPL
being required for x86_64, all x86_64 CPUs can use NOPL. So stop
caring about NOPs, simplify things and get on with life.

[ The problem seems to be that some uarchs can only decode NOPL on a
single front-end port while others have severe decode penalties for
excessive prefixes. All modern uarchs can handle both, except Atom,
which has prefix penalties. ]

[ Also, much doubt you can actually measure any of this on normal
workloads. ]

After this, FEATURE_NOPL is unused except for required-features for
x86_64. FEATURE_K8 is only used for PTI.

 [ bp: Kernel build measurements showed ~0.3s slowdown on Sandybridge
   which is hardly a slowdown. Get rid of X86_FEATURE_K7, while at it. ]

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Acked-by: Alexei Starovoitov  # bpf
Acked-by: Linus Torvalds 
Link: https://lkml.kernel.org/r/20210312115749.065275...@infradead.org
---
 arch/x86/include/asm/cpufeatures.h   |   2 +-
 arch/x86/include/asm/jump_label.h|  12 +--
 arch/x86/include/asm/nops.h  | 176 +++
 arch/x86/include/asm/special_insns.h |   4 +-
 arch/x86/kernel/alternative.c| 198 ++
 arch/x86/kernel/cpu/amd.c|   5 +-
 arch/x86/kernel/ftrace.c |   4 +-
 arch/x86/kernel/jump_label.c |  32 +
 arch/x86/kernel/kprobes/core.c   |   2 +-
 arch/x86/kernel/setup.c  |   1 +-
 arch/x86/kernel/static_call.c|   4 +-
 arch/x86/net/bpf_jit_comp.c  |   8 +-
 12 files changed, 97 insertions(+), 351 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index cc96e26..8afa318 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -84,7 +84,7 @@
 
 /* CPU types for specific tunings: */
 #define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
+/* FREE, was #define X86_FEATURE_K7( 3*32+ 5) "" Athlon */
 #define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
 #define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
 #define X86_FEATURE_CONSTANT_TSC   ( 3*32+ 8) /* TSC ticks at a constant 
rate */
diff --git a/arch/x86/include/asm/jump_label.h 
b/arch/x86/include/asm/jump_label.h
index 06c3cc2..5ce342b 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -6,12 +6,6 @@
 
 #define JUMP_LABEL_NOP_SIZE 5
 
-#ifdef CONFIG_X86_64
-# define STATIC_KEY_INIT_NOP P6_NOP5_ATOMIC
-#else
-# define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC
-#endif
-
 #include 
 #include 
 
@@ -23,7 +17,7 @@
 static __always_inline bool arch_static_branch(struct static_key *key, bool 
branch)
 {
asm_volatile_goto("1:"
-   ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
+   ".byte " __stringify(BYTES_NOP5) "\n\t"
".pushsection __jump_table,  \"aw\" \n\t"
_ASM_ALIGN "\n\t"
".long 1b - ., %l[l_yes] - . \n\t"
@@ -63,7 +57,7 @@ l_yes:
.long   \target - .Lstatic_jump_after_\@
 .Lstatic_jump_after_\@:
.else
-   .byte   STATIC_KEY_INIT_NOP
+   .byte   BYTES_NOP5
.endif
.pushsection __jump_table, "aw"
_ASM_ALIGN
@@ -75,7 +69,7 @@ l_yes:
 .macro STATIC_JUMP_IF_FALSE target, key, def
 .Lstatic_jump_\@:
.if \def
-   .byte   STATIC_KEY_INIT_NOP
+   .byte   BYTES_NOP5
.else
/* Equivalent to "jmp.d32 \target" */
.byte   0xe9
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h
index 12f12b5..c1e5e81 100644
--- a/arch/x86/include/asm/nops.h
+++ b/arch/x86/include/asm/nops.h
@@ -4,89 +4,58 @@
 
 /*
  * Define nops for use with alternative() and for tracing.
- *
- * *_NOP5_ATOMIC must be a single instruction.
  */
 
-#define NOP_DS_PREFIX 0x3e
+#ifndef CONFIG_64BIT
 
-/* generic versions from gas
-   1: nop
-   the following instructions are 

[tip: x86/cpu] objtool/x86: Use asm/nops.h

2021-03-15 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/cpu branch of tip:

Commit-ID: 301cddc21a157a3072d789a3097857202e550a24
Gitweb:
https://git.kernel.org/tip/301cddc21a157a3072d789a3097857202e550a24
Author:Peter Zijlstra 
AuthorDate:Fri, 12 Mar 2021 12:32:55 +01:00
Committer: Borislav Petkov 
CommitterDate: Mon, 15 Mar 2021 16:37:37 +01:00

objtool/x86: Use asm/nops.h

Since the kernel will rely on a single canonical set of NOPs, make sure
objtool uses the exact same ones.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Borislav Petkov 
Link: https://lkml.kernel.org/r/20210312115749.136357...@infradead.org
---
 tools/arch/x86/include/asm/nops.h | 81 ++-
 tools/objtool/arch/x86/decode.c   | 13 +++--
 tools/objtool/sync-check.sh   |  1 +-
 3 files changed, 90 insertions(+), 5 deletions(-)
 create mode 100644 tools/arch/x86/include/asm/nops.h

diff --git a/tools/arch/x86/include/asm/nops.h 
b/tools/arch/x86/include/asm/nops.h
new file mode 100644
index 000..c1e5e81
--- /dev/null
+++ b/tools/arch/x86/include/asm/nops.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_NOPS_H
+#define _ASM_X86_NOPS_H
+
+/*
+ * Define nops for use with alternative() and for tracing.
+ */
+
+#ifndef CONFIG_64BIT
+
+/*
+ * Generic 32bit nops from GAS:
+ *
+ * 1: nop
+ * 2: movl %esi,%esi
+ * 3: leal 0x0(%esi),%esi
+ * 4: leal 0x0(%esi,%eiz,1),%esi
+ * 5: leal %ds:0x0(%esi,%eiz,1),%esi
+ * 6: leal 0x0(%esi),%esi
+ * 7: leal 0x0(%esi,%eiz,1),%esi
+ * 8: leal %ds:0x0(%esi,%eiz,1),%esi
+ *
+ * Except 5 and 8, which are DS prefixed 4 and 7 resp, where GAS would emit 2
+ * nop instructions.
+ */
+#define BYTES_NOP1 0x90
+#define BYTES_NOP2 0x89,0xf6
+#define BYTES_NOP3 0x8d,0x76,0x00
+#define BYTES_NOP4 0x8d,0x74,0x26,0x00
+#define BYTES_NOP5 0x3e,BYTES_NOP4
+#define BYTES_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00
+#define BYTES_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00
+#define BYTES_NOP8 0x3e,BYTES_NOP7
+
+#else
+
+/*
+ * Generic 64bit nops from GAS:
+ *
+ * 1: nop
+ * 2: osp nop
+ * 3: nopl (%eax)
+ * 4: nopl 0x00(%eax)
+ * 5: nopl 0x00(%eax,%eax,1)
+ * 6: osp nopl 0x00(%eax,%eax,1)
+ * 7: nopl 0x(%eax)
+ * 8: nopl 0x(%eax,%eax,1)
+ */
+#define BYTES_NOP1 0x90
+#define BYTES_NOP2 0x66,BYTES_NOP1
+#define BYTES_NOP3 0x0f,0x1f,0x00
+#define BYTES_NOP4 0x0f,0x1f,0x40,0x00
+#define BYTES_NOP5 0x0f,0x1f,0x44,0x00,0x00
+#define BYTES_NOP6 0x66,BYTES_NOP5
+#define BYTES_NOP7 0x0f,0x1f,0x80,0x00,0x00,0x00,0x00
+#define BYTES_NOP8 0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
+
+#endif /* CONFIG_64BIT */
+
+#ifdef __ASSEMBLY__
+#define _ASM_MK_NOP(x) .byte x
+#else
+#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
+#endif
+
+#define ASM_NOP1 _ASM_MK_NOP(BYTES_NOP1)
+#define ASM_NOP2 _ASM_MK_NOP(BYTES_NOP2)
+#define ASM_NOP3 _ASM_MK_NOP(BYTES_NOP3)
+#define ASM_NOP4 _ASM_MK_NOP(BYTES_NOP4)
+#define ASM_NOP5 _ASM_MK_NOP(BYTES_NOP5)
+#define ASM_NOP6 _ASM_MK_NOP(BYTES_NOP6)
+#define ASM_NOP7 _ASM_MK_NOP(BYTES_NOP7)
+#define ASM_NOP8 _ASM_MK_NOP(BYTES_NOP8)
+
+#define ASM_NOP_MAX 8
+
+#ifndef __ASSEMBLY__
+extern const unsigned char * const x86_nops[];
+#endif
+
+#endif /* _ASM_X86_NOPS_H */
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 549813c..c117bfc 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -11,6 +11,9 @@
 #include "../../../arch/x86/lib/inat.c"
 #include "../../../arch/x86/lib/insn.c"
 
+#define CONFIG_64BIT 1
+#include 
+
 #include 
 #include 
 #include 
@@ -596,11 +599,11 @@ void arch_initial_func_cfi_state(struct cfi_init_state 
*state)
 const char *arch_nop_insn(int len)
 {
static const char nops[5][5] = {
-   /* 1 */ { 0x90 },
-   /* 2 */ { 0x66, 0x90 },
-   /* 3 */ { 0x0f, 0x1f, 0x00 },
-   /* 4 */ { 0x0f, 0x1f, 0x40, 0x00 },
-   /* 5 */ { 0x0f, 0x1f, 0x44, 0x00, 0x00 },
+   { BYTES_NOP1 },
+   { BYTES_NOP2 },
+   { BYTES_NOP3 },
+   { BYTES_NOP4 },
+   { BYTES_NOP5 },
};
 
if (len < 1 || len > 5) {
diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh
index 606a4b5..d232686 100755
--- a/tools/objtool/sync-check.sh
+++ b/tools/objtool/sync-check.sh
@@ -10,6 +10,7 @@ FILES="include/linux/objtool.h"
 
 if [ "$SRCARCH" = "x86" ]; then
 FILES="$FILES
+arch/x86/include/asm/nops.h
 arch/x86/include/asm/inat_types.h
 arch/x86/include/asm/orc_types.h
 arch/x86/include/asm/emulate_prefix.h


[tip: objtool/urgent] objtool,x86: Fix uaccess PUSHF/POPF validation

2021-03-12 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/urgent branch of tip:

Commit-ID: ba08abca66d46381df60842f64f70099d5482b92
Gitweb:
https://git.kernel.org/tip/ba08abca66d46381df60842f64f70099d5482b92
Author:Peter Zijlstra 
AuthorDate:Mon, 08 Mar 2021 15:46:04 +01:00
Committer: Peter Zijlstra 
CommitterDate: Fri, 12 Mar 2021 09:15:49 +01:00

objtool,x86: Fix uaccess PUSHF/POPF validation

Commit ab234a260b1f ("x86/pv: Rework arch_local_irq_restore() to not
use popf") replaced "push %reg; popf" with something like: "test
$0x200, %reg; jz 1f; sti; 1:", which breaks the pushf/popf symmetry
that commit ea24213d8088 ("objtool: Add UACCESS validation") relies
on.

The result is:

  drivers/gpu/drm/amd/amdgpu/si.o: warning: objtool: si_common_hw_init()+0xf36: 
PUSHF stack exhausted

Meanwhile, commit c9c324dc22aa ("objtool: Support stack layout changes
in alternatives") makes that we can actually use stack-ops in
alternatives, which means we can revert 1ff865e343c2 ("x86,smap: Fix
smap_{save,restore}() alternatives").

That in turn means we can limit the PUSHF/POPF handling of
ea24213d8088 to those instructions that are in alternatives.

Fixes: ab234a260b1f ("x86/pv: Rework arch_local_irq_restore() to not use popf")
Reported-by: Borislav Petkov 
Signed-off-by: Peter Zijlstra (Intel) 
Acked-by: Josh Poimboeuf 
Link: https://lkml.kernel.org/r/yey4ribqya5fn...@hirez.programming.kicks-ass.net
---
 arch/x86/include/asm/smap.h | 10 --
 tools/objtool/check.c   |  3 +++
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index 8b58d69..0bc9b08 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -58,9 +58,8 @@ static __always_inline unsigned long smap_save(void)
unsigned long flags;
 
asm volatile ("# smap_save\n\t"
- ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
- "pushf; pop %0; " __ASM_CLAC "\n\t"
- "1:"
+ ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC "\n\t",
+ X86_FEATURE_SMAP)
  : "=rm" (flags) : : "memory", "cc");
 
return flags;
@@ -69,9 +68,8 @@ static __always_inline unsigned long smap_save(void)
 static __always_inline void smap_restore(unsigned long flags)
 {
asm volatile ("# smap_restore\n\t"
- ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
- "push %0; popf\n\t"
- "1:"
+ ALTERNATIVE("", "push %0; popf\n\t",
+ X86_FEATURE_SMAP)
  : : "g" (flags) : "memory", "cc");
 }
 
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 068cdb4..5e5388a 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -2442,6 +2442,9 @@ static int handle_insn_ops(struct instruction *insn, 
struct insn_state *state)
if (update_cfi_state(insn, >cfi, op))
return 1;
 
+   if (!insn->alt_group)
+   continue;
+
if (op->dest.type == OP_DEST_PUSHF) {
if (!state->uaccess_stack) {
state->uaccess_stack = 1;


[tip: locking/urgent] u64_stats,lockdep: Fix u64_stats_init() vs lockdep

2021-03-10 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the locking/urgent branch of tip:

Commit-ID: d5b0e0677bfd5efd17c5bbb00156931f0d41cb85
Gitweb:
https://git.kernel.org/tip/d5b0e0677bfd5efd17c5bbb00156931f0d41cb85
Author:Peter Zijlstra 
AuthorDate:Mon, 08 Mar 2021 09:38:12 +01:00
Committer: Peter Zijlstra 
CommitterDate: Wed, 10 Mar 2021 09:51:45 +01:00

u64_stats,lockdep: Fix u64_stats_init() vs lockdep

Jakub reported that:

static struct net_device *rtl8139_init_board(struct pci_dev *pdev)
{
...
u64_stats_init(>rx_stats.syncp);
u64_stats_init(>tx_stats.syncp);
...
}

results in lockdep getting confused between the RX and TX stats lock.
This is because u64_stats_init() is an inline calling seqcount_init(),
which is a macro using a static variable to generate a lockdep class.

By wrapping that in an inline, we negate the effect of the macro and
fold the static key variable, hence the confusion.

Fix by also making u64_stats_init() a macro for the case where it
matters, leaving the other case an inline for argument validation
etc.

Reported-by: Jakub Kicinski 
Debugged-by: "Ahmed S. Darwish" 
Signed-off-by: Peter Zijlstra (Intel) 
Tested-by: "Erhard F." 
Link: https://lkml.kernel.org/r/yexicy6+9mksd...@hirez.programming.kicks-ass.net
---
 include/linux/u64_stats_sync.h | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index c6abb79..e81856c 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -115,12 +115,13 @@ static inline void u64_stats_inc(u64_stats_t *p)
 }
 #endif
 
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+#define u64_stats_init(syncp)  seqcount_init(&(syncp)->seq)
+#else
 static inline void u64_stats_init(struct u64_stats_sync *syncp)
 {
-#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
-   seqcount_init(>seq);
-#endif
 }
+#endif
 
 static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
 {


[tip: locking/urgent] seqlock,lockdep: Fix seqcount_latch_init()

2021-03-10 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the locking/urgent branch of tip:

Commit-ID: 4817a52b306136c8b2b2271d8770401441e4cf79
Gitweb:
https://git.kernel.org/tip/4817a52b306136c8b2b2271d8770401441e4cf79
Author:Peter Zijlstra 
AuthorDate:Tue, 09 Mar 2021 15:21:18 +01:00
Committer: Peter Zijlstra 
CommitterDate: Wed, 10 Mar 2021 09:51:45 +01:00

seqlock,lockdep: Fix seqcount_latch_init()

seqcount_init() must be a macro in order to preserve the static
variable that is used for the lockdep key. Don't then wrap it in an
inline function, which destroys that.

Luckily there aren't many users of this function, but fix it before it
becomes a problem.

Fixes: 80793c3471d9 ("seqlock: Introduce seqcount_latch_t")
Reported-by: Eric Dumazet 
Signed-off-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/yeefebnuvkzax...@hirez.programming.kicks-ass.net
---
 include/linux/seqlock.h | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 2f7bb92..f61e34f 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -664,10 +664,7 @@ typedef struct {
  * seqcount_latch_init() - runtime initializer for seqcount_latch_t
  * @s: Pointer to the seqcount_latch_t instance
  */
-static inline void seqcount_latch_init(seqcount_latch_t *s)
-{
-   seqcount_init(>seqcount);
-}
+#define seqcount_latch_init(s) seqcount_init(&(s)->seqcount)
 
 /**
  * raw_read_seqcount_latch() - pick even/odd latch data copy


[tip: x86/mm] smp: Micro-optimize smp_call_function_many_cond()

2021-03-06 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/mm branch of tip:

Commit-ID: d43f17a1da25373580ebb466de7d0641acbf6fd6
Gitweb:
https://git.kernel.org/tip/d43f17a1da25373580ebb466de7d0641acbf6fd6
Author:Peter Zijlstra 
AuthorDate:Tue, 02 Mar 2021 08:02:43 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 13:00:22 +01:00

smp: Micro-optimize smp_call_function_many_cond()

Call the generic send_call_function_single_ipi() function, which
will avoid the IPI when @last_cpu is idle.

Signed-off-by: Peter Zijlstra 
Signed-off-by: Ingo Molnar 
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar 
---
 kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/smp.c b/kernel/smp.c
index b6375d7..af0d51d 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -694,7 +694,7 @@ static void smp_call_function_many_cond(const struct 
cpumask *mask,
 * provided mask.
 */
if (nr_cpus == 1)
-   arch_send_call_function_single_ipi(last_cpu);
+   send_call_function_single_ipi(last_cpu);
else if (likely(nr_cpus > 1))
arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
}


[tip: locking/core] static_call: Fix the module key fixup

2021-03-06 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the locking/core branch of tip:

Commit-ID: 50bf8080a94d171e843fc013abec19d8ab9f50ae
Gitweb:
https://git.kernel.org/tip/50bf8080a94d171e843fc013abec19d8ab9f50ae
Author:Peter Zijlstra 
AuthorDate:Thu, 25 Feb 2021 23:03:51 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:49:08 +01:00

static_call: Fix the module key fixup

Provided the target address of a R_X86_64_PC32 relocation is aligned,
the low two bits should be invariant between the relative and absolute
value.

Turns out the address is not aligned and things go sideways, ensure we
transfer the bits in the absolute form when fixing up the key address.

Fixes: 73f44fe19d35 ("static_call: Allow module use without exposing 
static_call_key")
Reported-by: Steven Rostedt 
Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Tested-by: Steven Rostedt (VMware) 
Link: 
https://lkml.kernel.org/r/20210225220351.ge4...@worktop.programming.kicks-ass.net
---
 kernel/static_call.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/static_call.c b/kernel/static_call.c
index 6906c6e..ae82529 100644
--- a/kernel/static_call.c
+++ b/kernel/static_call.c
@@ -349,7 +349,8 @@ static int static_call_add_module(struct module *mod)
struct static_call_site *site;
 
for (site = start; site != stop; site++) {
-   unsigned long addr = (unsigned long)static_call_key(site);
+   unsigned long s_key = (long)site->key + (long)>key;
+   unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS;
unsigned long key;
 
/*
@@ -373,8 +374,8 @@ static int static_call_add_module(struct module *mod)
return -EINVAL;
}
 
-   site->key = (key - (long)>key) |
-   (site->key & STATIC_CALL_SITE_FLAGS);
+   key |= s_key & STATIC_CALL_SITE_FLAGS;
+   site->key = key - (long)>key;
}
 
return __static_call_init(mod, start, stop);


[tip: objtool/core] objtool,x86: More ModRM sugar

2021-03-06 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 36d92e43d01cbeeec99abdf405362243051d6b3f
Gitweb:
https://git.kernel.org/tip/36d92e43d01cbeeec99abdf405362243051d6b3f
Author:Peter Zijlstra 
AuthorDate:Fri, 12 Feb 2021 09:13:00 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:44:23 +01:00

objtool,x86: More ModRM sugar

Better helpers to decode ModRM.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Acked-by: Josh Poimboeuf 
Link: https://lkml.kernel.org/r/YCZB/ljatfxqq...@hirez.programming.kicks-ass.net
---
 tools/objtool/arch/x86/decode.c | 28 +---
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index b42e5ec..431bafb 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -82,15 +82,21 @@ unsigned long arch_jump_destination(struct instruction 
*insn)
  * 01 |  [r/m + d8]|[S+d]|   [r/m + d8]  |
  * 10 |  [r/m + d32]   |[S+D]|   [r/m + d32] |
  * 11 |   r/ m   |
- *
  */
+
+#define mod_is_mem()   (modrm_mod != 3)
+#define mod_is_reg()   (modrm_mod == 3)
+
 #define is_RIP()   ((modrm_rm & 7) == CFI_BP && modrm_mod == 0)
-#define have_SIB() ((modrm_rm & 7) == CFI_SP && modrm_mod != 3)
+#define have_SIB() ((modrm_rm & 7) == CFI_SP && mod_is_mem())
 
 #define rm_is(reg) (have_SIB() ? \
sib_base == (reg) && sib_index == CFI_SP : \
modrm_rm == (reg))
 
+#define rm_is_mem(reg) (mod_is_mem() && !is_RIP() && rm_is(reg))
+#define rm_is_reg(reg) (mod_is_reg() && modrm_rm == (reg))
+
 int arch_decode_instruction(const struct elf *elf, const struct section *sec,
unsigned long offset, unsigned int maxlen,
unsigned int *len, enum insn_type *type,
@@ -154,7 +160,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
 
case 0x1:
case 0x29:
-   if (rex_w && modrm_mod == 3 && modrm_rm == CFI_SP) {
+   if (rex_w && rm_is_reg(CFI_SP)) {
 
/* add/sub reg, %rsp */
ADD_OP(op) {
@@ -219,7 +225,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
break;
 
/* %rsp target only */
-   if (!(modrm_mod == 3 && modrm_rm == CFI_SP))
+   if (!rm_is_reg(CFI_SP))
break;
 
imm = insn.immediate.value;
@@ -272,7 +278,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
 
if (modrm_reg == CFI_SP) {
 
-   if (modrm_mod == 3) {
+   if (mod_is_reg()) {
/* mov %rsp, reg */
ADD_OP(op) {
op->src.type = OP_SRC_REG;
@@ -308,7 +314,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
break;
}
 
-   if (modrm_mod == 3 && modrm_rm == CFI_SP) {
+   if (rm_is_reg(CFI_SP)) {
 
/* mov reg, %rsp */
ADD_OP(op) {
@@ -325,7 +331,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
if (!rex_w)
break;
 
-   if ((modrm_mod == 1 || modrm_mod == 2) && modrm_rm == CFI_BP) {
+   if (rm_is_mem(CFI_BP)) {
 
/* mov reg, disp(%rbp) */
ADD_OP(op) {
@@ -338,7 +344,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
break;
}
 
-   if (modrm_mod != 3 && rm_is(CFI_SP)) {
+   if (rm_is_mem(CFI_SP)) {
 
/* mov reg, disp(%rsp) */
ADD_OP(op) {
@@ -357,7 +363,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
if (!rex_w)
break;
 
-   if ((modrm_mod == 1 || modrm_mod == 2) && modrm_rm == CFI_BP) {
+   if (rm_is_mem(CFI_BP)) {
 
/* mov disp(%rbp), reg */
ADD_OP(op) {
@@ -370,7 +376,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
break;
}
 
-   if (modrm_mod != 3 && rm_is(CFI_SP)) {
+   if (rm_is_mem(CFI_SP)) {
 
/* mov disp(%rsp), reg */
ADD_OP(op) {
@@ -386,7 +392,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
break;
 
case 0x8d:
-   if (modrm_mod == 3) {
+   if (mod_is_reg()) {
  

[tip: objtool/core] objtool: Collate parse_options() users

2021-03-06 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: a2f605f9ff57397d05a8e2f282b78a69f574d305
Gitweb:
https://git.kernel.org/tip/a2f605f9ff57397d05a8e2f282b78a69f574d305
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Feb 2021 11:18:24 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:44:23 +01:00

objtool: Collate parse_options() users

Ensure there's a single place that parses check_options, in
preparation for extending where to get options from.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Acked-by: Josh Poimboeuf 
Link: https://lkml.kernel.org/r/20210226110004.193108...@infradead.org
---
 tools/objtool/builtin-check.c   | 14 +-
 tools/objtool/builtin-orc.c |  5 +
 tools/objtool/include/objtool/builtin.h |  2 ++
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 97f063d..0399752 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -42,17 +42,21 @@ const struct option check_options[] = {
OPT_END(),
 };
 
+int cmd_parse_options(int argc, const char **argv, const char * const usage[])
+{
+   argc = parse_options(argc, argv, check_options, usage, 0);
+   if (argc != 1)
+   usage_with_options(usage, check_options);
+   return argc;
+}
+
 int cmd_check(int argc, const char **argv)
 {
const char *objname;
struct objtool_file *file;
int ret;
 
-   argc = parse_options(argc, argv, check_options, check_usage, 0);
-
-   if (argc != 1)
-   usage_with_options(check_usage, check_options);
-
+   argc = cmd_parse_options(argc, argv, check_usage);
objname = argv[0];
 
file = objtool_open_read(objname);
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
index 8273bbf..17f8b93 100644
--- a/tools/objtool/builtin-orc.c
+++ b/tools/objtool/builtin-orc.c
@@ -34,10 +34,7 @@ int cmd_orc(int argc, const char **argv)
struct objtool_file *file;
int ret;
 
-   argc = parse_options(argc, argv, check_options, orc_usage, 0);
-   if (argc != 1)
-   usage_with_options(orc_usage, check_options);
-
+   argc = cmd_parse_options(argc, argv, orc_usage);
objname = argv[0];
 
file = objtool_open_read(objname);
diff --git a/tools/objtool/include/objtool/builtin.h 
b/tools/objtool/include/objtool/builtin.h
index d019210..15ac0b7 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -11,6 +11,8 @@ extern const struct option check_options[];
 extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, 
stats,
 validate_dup, vmlinux, mcount, noinstr, backup;
 
+extern int cmd_parse_options(int argc, const char **argv, const char * const 
usage[]);
+
 extern int cmd_check(int argc, const char **argv);
 extern int cmd_orc(int argc, const char **argv);
 


[tip: objtool/core] objtool: Parse options from OBJTOOL_ARGS

2021-03-06 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 900b4df347bbac4874149a226143a556909faba8
Gitweb:
https://git.kernel.org/tip/900b4df347bbac4874149a226143a556909faba8
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Feb 2021 11:32:30 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:44:23 +01:00

objtool: Parse options from OBJTOOL_ARGS

Teach objtool to parse options from the OBJTOOL_ARGS environment
variable.

This enables things like:

  $ OBJTOOL_ARGS="--backup" make O=defconfig-build/ kernel/ponies.o

to obtain both defconfig-build/kernel/ponies.o{,.orig} and easily
inspect what objtool actually did.

Suggested-by: Borislav Petkov 
Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Acked-by: Josh Poimboeuf 
Link: https://lkml.kernel.org/r/20210226110004.252553...@infradead.org
---
 tools/objtool/builtin-check.c | 25 +
 1 file changed, 25 insertions(+)

diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 0399752..8b38b5d 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -15,6 +15,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -26,6 +27,11 @@ static const char * const check_usage[] = {
NULL,
 };
 
+static const char * const env_usage[] = {
+   "OBJTOOL_ARGS=\"\"",
+   NULL,
+};
+
 const struct option check_options[] = {
OPT_BOOLEAN('f', "no-fp", _fp, "Skip frame pointer validation"),
OPT_BOOLEAN('u', "no-unreachable", _unreachable, "Skip 'unreachable 
instruction' warnings"),
@@ -44,6 +50,25 @@ const struct option check_options[] = {
 
 int cmd_parse_options(int argc, const char **argv, const char * const usage[])
 {
+   const char *envv[16] = { };
+   char *env;
+   int envc;
+
+   env = getenv("OBJTOOL_ARGS");
+   if (env) {
+   envv[0] = "OBJTOOL_ARGS";
+   for (envc = 1; envc < ARRAY_SIZE(envv); ) {
+   envv[envc++] = env;
+   env = strchr(env, ' ');
+   if (!env)
+   break;
+   *env = '\0';
+   env++;
+   }
+
+   parse_options(envc, envv, check_options, env_usage, 0);
+   }
+
argc = parse_options(argc, argv, check_options, usage, 0);
if (argc != 1)
usage_with_options(usage, check_options);


[tip: objtool/core] objtool,x86: Rewrite LEA decode

2021-03-06 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 2ee0c363492f1acc1082125218e6a80c0d7d502b
Gitweb:
https://git.kernel.org/tip/2ee0c363492f1acc1082125218e6a80c0d7d502b
Author:Peter Zijlstra 
AuthorDate:Tue, 09 Feb 2021 21:29:16 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:44:23 +01:00

objtool,x86: Rewrite LEA decode

Current LEA decoding is a bunch of special cases, properly decode the
instruction, with exception of full SIB and RIP-relative modes.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Acked-by: Josh Poimboeuf 
Tested-by: Nick Desaulniers 
Link: https://lkml.kernel.org/r/20210211173627.143250...@infradead.org
---
 tools/objtool/arch/x86/decode.c | 86 ++--
 1 file changed, 28 insertions(+), 58 deletions(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 549813c..d8f0138 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -91,9 +91,10 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
 {
struct insn insn;
int x86_64, sign;
-   unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0,
- rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0,
- modrm_reg = 0, sib = 0;
+   unsigned char op1, op2,
+ rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0,
+ modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0,
+ sib = 0;
struct stack_op *op = NULL;
struct symbol *sym;
 
@@ -328,68 +329,37 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
break;
 
case 0x8d:
-   if (sib == 0x24 && rex_w && !rex_b && !rex_x) {
-
-   ADD_OP(op) {
-   if (!insn.displacement.value) {
-   /* lea (%rsp), reg */
-   op->src.type = OP_SRC_REG;
-   } else {
-   /* lea disp(%rsp), reg */
-   op->src.type = OP_SRC_ADD;
-   op->src.offset = 
insn.displacement.value;
-   }
-   op->src.reg = CFI_SP;
-   op->dest.type = OP_DEST_REG;
-   op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
-   }
-
-   } else if (rex == 0x48 && modrm == 0x65) {
-
-   /* lea disp(%rbp), %rsp */
-   ADD_OP(op) {
-   op->src.type = OP_SRC_ADD;
-   op->src.reg = CFI_BP;
-   op->src.offset = insn.displacement.value;
-   op->dest.type = OP_DEST_REG;
-   op->dest.reg = CFI_SP;
-   }
+   if (modrm_mod == 3) {
+   WARN("invalid LEA encoding at %s:0x%lx", sec->name, 
offset);
+   break;
+   }
 
-   } else if (rex == 0x49 && modrm == 0x62 &&
-  insn.displacement.value == -8) {
+   /* skip non 64bit ops */
+   if (!rex_w)
+   break;
 
-   /*
-* lea -0x8(%r10), %rsp
-*
-* Restoring rsp back to its original value after a
-* stack realignment.
-*/
-   ADD_OP(op) {
-   op->src.type = OP_SRC_ADD;
-   op->src.reg = CFI_R10;
-   op->src.offset = -8;
-   op->dest.type = OP_DEST_REG;
-   op->dest.reg = CFI_SP;
-   }
+   /* skip nontrivial SIB */
+   if (modrm_rm == 4 && !(sib == 0x24 && rex_b == rex_x))
+   break;
 
-   } else if (rex == 0x49 && modrm == 0x65 &&
-  insn.displacement.value == -16) {
+   /* skip RIP relative displacement */
+   if (modrm_rm == 5 && modrm_mod == 0)
+   break;
 
-   /*
-* lea -0x10(%r13), %rsp
-*
-* Restoring rsp back to its original value after a
-* stack realignment.
-*/
-   ADD_OP(op) {
+   /* lea disp(%src), %dst */
+   ADD_OP(op) {
+   op->src.offset = insn.displacement.value;
+   if (!op->src.offset) {
+

[tip: objtool/core] objtool,x86: Rewrite LEAVE

2021-03-06 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: ffc7e74f36a2c7424da262a32a0bbe59669677ef
Gitweb:
https://git.kernel.org/tip/ffc7e74f36a2c7424da262a32a0bbe59669677ef
Author:Peter Zijlstra 
AuthorDate:Tue, 09 Feb 2021 21:41:13 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:44:23 +01:00

objtool,x86: Rewrite LEAVE

Since we can now have multiple stack-ops per instruction, we don't
need to special case LEAVE and can simply emit the composite
operations.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Acked-by: Josh Poimboeuf 
Tested-by: Nick Desaulniers 
Link: https://lkml.kernel.org/r/20210211173627.253273...@infradead.org
---
 tools/objtool/arch/x86/decode.c  | 14 +++---
 tools/objtool/check.c| 24 ++--
 tools/objtool/include/objtool/arch.h |  1 -
 3 files changed, 13 insertions(+), 26 deletions(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index d8f0138..47b9acf 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -446,9 +446,17 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
 * mov bp, sp
 * pop bp
 */
-   ADD_OP(op)
-   op->dest.type = OP_DEST_LEAVE;
-
+   ADD_OP(op) {
+   op->src.type = OP_SRC_REG;
+   op->src.reg = CFI_BP;
+   op->dest.type = OP_DEST_REG;
+   op->dest.reg = CFI_SP;
+   }
+   ADD_OP(op) {
+   op->src.type = OP_SRC_POP;
+   op->dest.type = OP_DEST_REG;
+   op->dest.reg = CFI_BP;
+   }
break;
 
case 0xe3:
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 12b8f0f..a0f762a 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -2020,7 +2020,7 @@ static int update_cfi_state(struct instruction *insn,
}
 
else if (op->src.reg == CFI_BP && op->dest.reg == 
CFI_SP &&
-cfa->base == CFI_BP) {
+(cfa->base == CFI_BP || cfa->base == 
cfi->drap_reg)) {
 
/*
 * mov %rbp, %rsp
@@ -2217,7 +2217,7 @@ static int update_cfi_state(struct instruction *insn,
cfa->offset = 0;
cfi->drap_offset = -1;
 
-   } else if (regs[op->dest.reg].offset == 
-cfi->stack_size) {
+   } else if (cfi->stack_size == 
-regs[op->dest.reg].offset) {
 
/* pop %reg */
restore_reg(cfi, op->dest.reg);
@@ -2358,26 +2358,6 @@ static int update_cfi_state(struct instruction *insn,
 
break;
 
-   case OP_DEST_LEAVE:
-   if ((!cfi->drap && cfa->base != CFI_BP) ||
-   (cfi->drap && cfa->base != cfi->drap_reg)) {
-   WARN_FUNC("leave instruction with modified stack frame",
- insn->sec, insn->offset);
-   return -1;
-   }
-
-   /* leave (mov %rbp, %rsp; pop %rbp) */
-
-   cfi->stack_size = -cfi->regs[CFI_BP].offset - 8;
-   restore_reg(cfi, CFI_BP);
-
-   if (!cfi->drap) {
-   cfa->base = CFI_SP;
-   cfa->offset -= 8;
-   }
-
-   break;
-
case OP_DEST_MEM:
if (op->src.type != OP_SRC_POP && op->src.type != OP_SRC_POPF) {
WARN_FUNC("unknown stack-related memory operation",
diff --git a/tools/objtool/include/objtool/arch.h 
b/tools/objtool/include/objtool/arch.h
index 6ff0685..ff21f38 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -35,7 +35,6 @@ enum op_dest_type {
OP_DEST_MEM,
OP_DEST_PUSH,
OP_DEST_PUSHF,
-   OP_DEST_LEAVE,
 };
 
 struct op_dest {


[tip: objtool/core] objtool,x86: Renumber CFI_reg

2021-03-06 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: d473b18b2ef62563fb874f9cae6e123f99129e3f
Gitweb:
https://git.kernel.org/tip/d473b18b2ef62563fb874f9cae6e123f99129e3f
Author:Peter Zijlstra 
AuthorDate:Tue, 09 Feb 2021 20:18:21 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:44:22 +01:00

objtool,x86: Renumber CFI_reg

Make them match the instruction encoding numbering.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Acked-by: Josh Poimboeuf 
Tested-by: Nick Desaulniers 
Link: https://lkml.kernel.org/r/20210211173627.033720...@infradead.org
---
 tools/objtool/arch/x86/include/arch/cfi_regs.h | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/objtool/arch/x86/include/arch/cfi_regs.h 
b/tools/objtool/arch/x86/include/arch/cfi_regs.h
index 79bc517..0579d22 100644
--- a/tools/objtool/arch/x86/include/arch/cfi_regs.h
+++ b/tools/objtool/arch/x86/include/arch/cfi_regs.h
@@ -4,13 +4,13 @@
 #define _OBJTOOL_CFI_REGS_H
 
 #define CFI_AX 0
-#define CFI_DX 1
-#define CFI_CX 2
+#define CFI_CX 1
+#define CFI_DX 2
 #define CFI_BX 3
-#define CFI_SI 4
-#define CFI_DI 5
-#define CFI_BP 6
-#define CFI_SP 7
+#define CFI_SP 4
+#define CFI_BP 5
+#define CFI_SI 6
+#define CFI_DI 7
 #define CFI_R8 8
 #define CFI_R9 9
 #define CFI_R1010


[tip: objtool/core] objtool: Allow UNWIND_HINT to suppress dodgy stack modifications

2021-03-06 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: d54dba41999498b38a40940e1123019d50b26496
Gitweb:
https://git.kernel.org/tip/d54dba41999498b38a40940e1123019d50b26496
Author:Peter Zijlstra 
AuthorDate:Thu, 11 Feb 2021 13:03:28 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:44:22 +01:00

objtool: Allow UNWIND_HINT to suppress dodgy stack modifications

rewind_stack_do_exit()
UNWIND_HINT_FUNC
/* Prevent any naive code from trying to unwind to our caller. */

xorl%ebp, %ebp
movqPER_CPU_VAR(cpu_current_top_of_stack), %rax
leaq-PTREGS_SIZE(%rax), %rsp
UNWIND_HINT_REGS

calldo_exit

Does unspeakable things to the stack, which objtool currently fails to
detect due to a limitation in instruction decoding. This will be
rectified after which the above will result in:

arch/x86/entry/entry_64.o: warning: objtool: .text+0xab: unsupported stack 
register modification

Allow the UNWIND_HINT on the next instruction to suppress this, it
will overwrite the state anyway.

Suggested-by: Josh Poimboeuf 
Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Acked-by: Josh Poimboeuf 
Tested-by: Nick Desaulniers 
Link: https://lkml.kernel.org/r/20210211173626.918498...@infradead.org
---
 tools/objtool/check.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 068cdb4..12b8f0f 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1959,8 +1959,9 @@ static void restore_reg(struct cfi_state *cfi, unsigned 
char reg)
  *   41 5d pop%r13
  *   c3retq
  */
-static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
-struct stack_op *op)
+static int update_cfi_state(struct instruction *insn,
+   struct instruction *next_insn,
+   struct cfi_state *cfi, struct stack_op *op)
 {
struct cfi_reg *cfa = >cfa;
struct cfi_reg *regs = cfi->regs;
@@ -2161,7 +2162,7 @@ static int update_cfi_state(struct instruction *insn, 
struct cfi_state *cfi,
break;
}
 
-   if (op->dest.reg == cfi->cfa.base) {
+   if (op->dest.reg == cfi->cfa.base && !(next_insn && 
next_insn->hint)) {
WARN_FUNC("unsupported stack register 
modification",
  insn->sec, insn->offset);
return -1;
@@ -2433,13 +2434,15 @@ static int propagate_alt_cfi(struct objtool_file *file, 
struct instruction *insn
return 0;
 }
 
-static int handle_insn_ops(struct instruction *insn, struct insn_state *state)
+static int handle_insn_ops(struct instruction *insn,
+  struct instruction *next_insn,
+  struct insn_state *state)
 {
struct stack_op *op;
 
list_for_each_entry(op, >stack_ops, list) {
 
-   if (update_cfi_state(insn, >cfi, op))
+   if (update_cfi_state(insn, next_insn, >cfi, op))
return 1;
 
if (op->dest.type == OP_DEST_PUSHF) {
@@ -2719,7 +2722,7 @@ static int validate_branch(struct objtool_file *file, 
struct symbol *func,
return 0;
}
 
-   if (handle_insn_ops(insn, ))
+   if (handle_insn_ops(insn, next_insn, ))
return 1;
 
switch (insn->type) {


[tip: objtool/core] objtool,x86: Simplify register decode

2021-03-06 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 16ef7f159c503c7befec7018ee0e82fdc311721e
Gitweb:
https://git.kernel.org/tip/16ef7f159c503c7befec7018ee0e82fdc311721e
Author:Peter Zijlstra 
AuthorDate:Tue, 09 Feb 2021 19:59:43 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:44:23 +01:00

objtool,x86: Simplify register decode

Since the CFI_reg number now matches the instruction encoding order do
away with the op_to_cfi_reg[] and use direct assignment.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Acked-by: Josh Poimboeuf 
Tested-by: Nick Desaulniers 
Link: https://lkml.kernel.org/r/20210211173627.362004...@infradead.org
---
 tools/objtool/arch/x86/decode.c | 79 +++-
 1 file changed, 39 insertions(+), 40 deletions(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 47b9acf..5ce7dc4 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -17,17 +17,6 @@
 #include 
 #include 
 
-static unsigned char op_to_cfi_reg[][2] = {
-   {CFI_AX, CFI_R8},
-   {CFI_CX, CFI_R9},
-   {CFI_DX, CFI_R10},
-   {CFI_BX, CFI_R11},
-   {CFI_SP, CFI_R12},
-   {CFI_BP, CFI_R13},
-   {CFI_SI, CFI_R14},
-   {CFI_DI, CFI_R15},
-};
-
 static int is_x86_64(const struct elf *elf)
 {
switch (elf->ehdr.e_machine) {
@@ -94,7 +83,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
unsigned char op1, op2,
  rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0,
  modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0,
- sib = 0;
+ sib = 0 /* , sib_scale = 0, sib_index = 0, sib_base = 0 
*/;
struct stack_op *op = NULL;
struct symbol *sym;
 
@@ -130,23 +119,29 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
if (insn.modrm.nbytes) {
modrm = insn.modrm.bytes[0];
modrm_mod = X86_MODRM_MOD(modrm);
-   modrm_reg = X86_MODRM_REG(modrm);
-   modrm_rm = X86_MODRM_RM(modrm);
+   modrm_reg = X86_MODRM_REG(modrm) + 8*rex_r;
+   modrm_rm  = X86_MODRM_RM(modrm)  + 8*rex_b;
}
 
-   if (insn.sib.nbytes)
+   if (insn.sib.nbytes) {
sib = insn.sib.bytes[0];
+   /*
+   sib_scale = X86_SIB_SCALE(sib);
+   sib_index = X86_SIB_INDEX(sib) + 8*rex_x;
+   sib_base  = X86_SIB_BASE(sib)  + 8*rex_b;
+*/
+   }
 
switch (op1) {
 
case 0x1:
case 0x29:
-   if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
+   if (rex_w && modrm_mod == 3 && modrm_rm == CFI_SP) {
 
/* add/sub reg, %rsp */
ADD_OP(op) {
op->src.type = OP_SRC_ADD;
-   op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+   op->src.reg = modrm_reg;
op->dest.type = OP_DEST_REG;
op->dest.reg = CFI_SP;
}
@@ -158,7 +153,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
/* push reg */
ADD_OP(op) {
op->src.type = OP_SRC_REG;
-   op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+   op->src.reg = (op1 & 0x7) + 8*rex_b;
op->dest.type = OP_DEST_PUSH;
}
 
@@ -170,7 +165,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
ADD_OP(op) {
op->src.type = OP_SRC_POP;
op->dest.type = OP_DEST_REG;
-   op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+   op->dest.reg = (op1 & 0x7) + 8*rex_b;
}
 
break;
@@ -223,7 +218,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
break;
 
case 0x89:
-   if (rex_w && !rex_r && modrm_reg == 4) {
+   if (rex_w && modrm_reg == CFI_SP) {
 
if (modrm_mod == 3) {
/* mov %rsp, reg */
@@ -231,17 +226,17 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
op->src.type = OP_SRC_REG;
op->src.reg = CFI_SP;
op->dest.type = OP_DEST_REG;
-   op->dest.reg = 
op_to_cfi_reg[modrm_rm][rex_b];
+   op->dest.reg = modrm_rm;
}

[tip: objtool/core] objtool,x86: Rewrite ADD/SUB/AND

2021-03-06 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 961d83b9073b1ce5834af50d3c69e5e2461c6fd3
Gitweb:
https://git.kernel.org/tip/961d83b9073b1ce5834af50d3c69e5e2461c6fd3
Author:Peter Zijlstra 
AuthorDate:Wed, 10 Feb 2021 14:11:30 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:44:23 +01:00

objtool,x86: Rewrite ADD/SUB/AND

Support sign extending and imm8 forms.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Acked-by: Josh Poimboeuf 
Tested-by: Nick Desaulniers 
Link: https://lkml.kernel.org/r/20210211173627.588366...@infradead.org
---
 tools/objtool/arch/x86/decode.c | 70 +++-
 1 file changed, 51 insertions(+), 19 deletions(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 78ae5be..b42e5ec 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -98,13 +98,14 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
struct list_head *ops_list)
 {
struct insn insn;
-   int x86_64, sign;
+   int x86_64;
unsigned char op1, op2,
  rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0,
  modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0,
  sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0;
struct stack_op *op = NULL;
struct symbol *sym;
+   u64 imm;
 
x86_64 = is_x86_64(elf);
if (x86_64 == -1)
@@ -200,12 +201,54 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
*type = INSN_JUMP_CONDITIONAL;
break;
 
-   case 0x81:
-   case 0x83:
-   if (rex != 0x48)
+   case 0x80 ... 0x83:
+   /*
+* 1000 00sw : mod OP r/m : immediate
+*
+* s - sign extend immediate
+* w - imm8 / imm32
+*
+* OP: 000 ADD100 AND
+* 001 OR 101 SUB
+* 010 ADC110 XOR
+* 011 SBB111 CMP
+*/
+
+   /* 64bit only */
+   if (!rex_w)
break;
 
-   if (modrm == 0xe4) {
+   /* %rsp target only */
+   if (!(modrm_mod == 3 && modrm_rm == CFI_SP))
+   break;
+
+   imm = insn.immediate.value;
+   if (op1 & 2) { /* sign extend */
+   if (op1 & 1) { /* imm32 */
+   imm <<= 32;
+   imm = (s64)imm >> 32;
+   } else { /* imm8 */
+   imm <<= 56;
+   imm = (s64)imm >> 56;
+   }
+   }
+
+   switch (modrm_reg & 7) {
+   case 5:
+   imm = -imm;
+   /* fallthrough */
+   case 0:
+   /* add/sub imm, %rsp */
+   ADD_OP(op) {
+   op->src.type = OP_SRC_ADD;
+   op->src.reg = CFI_SP;
+   op->src.offset = imm;
+   op->dest.type = OP_DEST_REG;
+   op->dest.reg = CFI_SP;
+   }
+   break;
+
+   case 4:
/* and imm, %rsp */
ADD_OP(op) {
op->src.type = OP_SRC_AND;
@@ -215,23 +258,12 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
op->dest.reg = CFI_SP;
}
break;
-   }
 
-   if (modrm == 0xc4)
-   sign = 1;
-   else if (modrm == 0xec)
-   sign = -1;
-   else
+   default:
+   /* WARN ? */
break;
-
-   /* add/sub imm, %rsp */
-   ADD_OP(op) {
-   op->src.type = OP_SRC_ADD;
-   op->src.reg = CFI_SP;
-   op->src.offset = insn.immediate.value * sign;
-   op->dest.type = OP_DEST_REG;
-   op->dest.reg = CFI_SP;
}
+
break;
 
case 0x89:


[tip: objtool/core] objtool,x86: Support %riz encodings

2021-03-06 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 78df6245c3c82484200b9f8e306dc86fb19e9c02
Gitweb:
https://git.kernel.org/tip/78df6245c3c82484200b9f8e306dc86fb19e9c02
Author:Peter Zijlstra 
AuthorDate:Wed, 10 Feb 2021 11:47:35 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:44:23 +01:00

objtool,x86: Support %riz encodings

When there's a SIB byte, the register otherwise denoted by r/m will
then be denoted by SIB.base REX.b will now extend this. SIB.index == SP
is magic and notes an index value zero.

This means that there's a bunch of alternative (longer) encodings for
the same thing. Eg. 'ModRM.mod != 3, ModRM.r/m = AX' can be encoded as
'ModRM.mod != 3, ModRM.r/m = SP, SIB.base = AX, SIB.index = SP' which is 
actually 4
different encodings because the value of SIB.scale is irrelevant,
giving rise to 5 different but equal encodings.

Support these encodings and clean up the SIB handling in general.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Acked-by: Josh Poimboeuf 
Tested-by: Nick Desaulniers 
Link: https://lkml.kernel.org/r/20210211173627.472967...@infradead.org
---
 tools/objtool/arch/x86/decode.c | 67 ++--
 1 file changed, 48 insertions(+), 19 deletions(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 5ce7dc4..78ae5be 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -72,6 +72,25 @@ unsigned long arch_jump_destination(struct instruction *insn)
return -1; \
else for (list_add_tail(>list, ops_list); op; op = NULL)
 
+/*
+ * Helpers to decode ModRM/SIB:
+ *
+ * r/m| AX  CX  DX  BX |  SP |  BP |  SI  DI |
+ *| R8  R9 R10 R11 | R12 | R13 | R14 R15 |
+ * Mod++-+-+-+
+ * 00 |[r/m]   |[SIB]|[IP+]|  [r/m]  |
+ * 01 |  [r/m + d8]|[S+d]|   [r/m + d8]  |
+ * 10 |  [r/m + d32]   |[S+D]|   [r/m + d32] |
+ * 11 |   r/ m   |
+ *
+ */
+#define is_RIP()   ((modrm_rm & 7) == CFI_BP && modrm_mod == 0)
+#define have_SIB() ((modrm_rm & 7) == CFI_SP && modrm_mod != 3)
+
+#define rm_is(reg) (have_SIB() ? \
+   sib_base == (reg) && sib_index == CFI_SP : \
+   modrm_rm == (reg))
+
 int arch_decode_instruction(const struct elf *elf, const struct section *sec,
unsigned long offset, unsigned int maxlen,
unsigned int *len, enum insn_type *type,
@@ -83,7 +102,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
unsigned char op1, op2,
  rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0,
  modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0,
- sib = 0 /* , sib_scale = 0, sib_index = 0, sib_base = 0 
*/;
+ sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0;
struct stack_op *op = NULL;
struct symbol *sym;
 
@@ -125,11 +144,9 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
 
if (insn.sib.nbytes) {
sib = insn.sib.bytes[0];
-   /*
-   sib_scale = X86_SIB_SCALE(sib);
+   /* sib_scale = X86_SIB_SCALE(sib); */
sib_index = X86_SIB_INDEX(sib) + 8*rex_x;
sib_base  = X86_SIB_BASE(sib)  + 8*rex_b;
-*/
}
 
switch (op1) {
@@ -218,7 +235,10 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
break;
 
case 0x89:
-   if (rex_w && modrm_reg == CFI_SP) {
+   if (!rex_w)
+   break;
+
+   if (modrm_reg == CFI_SP) {
 
if (modrm_mod == 3) {
/* mov %rsp, reg */
@@ -231,14 +251,17 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
break;
 
} else {
-   /* skip nontrivial SIB */
-   if ((modrm_rm & 7) == 4 && !(sib == 0x24 && 
rex_b == rex_x))
-   break;
-
/* skip RIP relative displacement */
-   if ((modrm_rm & 7) == 5 && modrm_mod == 0)
+   if (is_RIP())
break;
 
+   /* skip nontrivial SIB */
+   if (have_SIB()) {
+   modrm_rm = sib_base;
+   if (sib_index != CFI_SP)
+   break;
+   }
+
/* mov %rsp, disp(%reg) */
ADD_OP(op) {
   

[tip: objtool/core] objtool: Add --backup

2021-03-06 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 8ad15c6900840e8a2163012f4581c52127622e02
Gitweb:
https://git.kernel.org/tip/8ad15c6900840e8a2163012f4581c52127622e02
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Feb 2021 10:59:59 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:44:23 +01:00

objtool: Add --backup

Teach objtool to write backups files, such that it becomes easier to
see what objtool did to the object file.

Backup files will be ${name}.orig.

Suggested-by: Borislav Petkov 
Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Acked-by: Borislav Petkov 
Acked-by: Josh Poimboeuf 
Link: https://lkml.kernel.org/r/yd4obt3aoxpwl...@hirez.programming.kicks-ass.net
---
 tools/objtool/builtin-check.c   |  4 +-
 tools/objtool/include/objtool/builtin.h |  3 +-
 tools/objtool/objtool.c | 64 -
 3 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index c3a85d8..97f063d 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -18,7 +18,8 @@
 #include 
 #include 
 
-bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, 
validate_dup, vmlinux, mcount, noinstr;
+bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
+ validate_dup, vmlinux, mcount, noinstr, backup;
 
 static const char * const check_usage[] = {
"objtool check [] file.o",
@@ -37,6 +38,7 @@ const struct option check_options[] = {
OPT_BOOLEAN('n', "noinstr", , "noinstr validation for 
vmlinux.o"),
OPT_BOOLEAN('l', "vmlinux", , "vmlinux.o validation"),
OPT_BOOLEAN('M', "mcount", , "generate __mcount_loc"),
+   OPT_BOOLEAN('B', "backup", , "create .orig files before 
modification"),
OPT_END(),
 };
 
diff --git a/tools/objtool/include/objtool/builtin.h 
b/tools/objtool/include/objtool/builtin.h
index 2502bb2..d019210 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -8,7 +8,8 @@
 #include 
 
 extern const struct option check_options[];
-extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, 
stats, validate_dup, vmlinux, mcount, noinstr;
+extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, 
stats,
+validate_dup, vmlinux, mcount, noinstr, backup;
 
 extern int cmd_check(int argc, const char **argv);
 extern int cmd_orc(int argc, const char **argv);
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 7b97ce4..43c1836 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -44,6 +45,64 @@ bool help;
 const char *objname;
 static struct objtool_file file;
 
+static bool objtool_create_backup(const char *_objname)
+{
+   int len = strlen(_objname);
+   char *buf, *base, *name = malloc(len+6);
+   int s, d, l, t;
+
+   if (!name) {
+   perror("failed backup name malloc");
+   return false;
+   }
+
+   strcpy(name, _objname);
+   strcpy(name + len, ".orig");
+
+   d = open(name, O_CREAT|O_WRONLY|O_TRUNC, 0644);
+   if (d < 0) {
+   perror("failed to create backup file");
+   return false;
+   }
+
+   s = open(_objname, O_RDONLY);
+   if (s < 0) {
+   perror("failed to open orig file");
+   return false;
+   }
+
+   buf = malloc(4096);
+   if (!buf) {
+   perror("failed backup data malloc");
+   return false;
+   }
+
+   while ((l = read(s, buf, 4096)) > 0) {
+   base = buf;
+   do {
+   t = write(d, base, l);
+   if (t < 0) {
+   perror("failed backup write");
+   return false;
+   }
+   base += t;
+   l -= t;
+   } while (l);
+   }
+
+   if (l < 0) {
+   perror("failed backup read");
+   return false;
+   }
+
+   free(name);
+   free(buf);
+   close(d);
+   close(s);
+
+   return true;
+}
+
 struct objtool_file *objtool_open_read(const char *_objname)
 {
if (objname) {
@@ -59,6 +118,11 @@ struct objtool_file *objtool_open_read(const char *_objname)
if (!file.elf)
return NULL;
 
+   if (backup && !objtool_create_backup(objname)) {
+   WARN("can't create backup file");
+   return NULL;
+   }
+
INIT_LIST_HEAD(_list);
hash_init(file.insn_hash);
INIT_LIST_HEAD(_call_list);


[tip: sched/core] sched: Simplify set_affinity_pending refcounts

2021-03-06 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: 50caf9c14b1498c90cf808dbba2ca29bd32ccba4
Gitweb:
https://git.kernel.org/tip/50caf9c14b1498c90cf808dbba2ca29bd32ccba4
Author:Peter Zijlstra 
AuthorDate:Wed, 24 Feb 2021 11:42:08 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:40:21 +01:00

sched: Simplify set_affinity_pending refcounts

Now that we have set_affinity_pending::stop_pending to indicate if a
stopper is in progress, and we have the guarantee that if that stopper
exists, it will (eventually) complete our @pending we can simplify the
refcount scheme by no longer counting the stopper thread.

Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()")
Cc: sta...@kernel.org
Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Reviewed-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210224131355.724130...@infradead.org
---
 kernel/sched/core.c | 32 
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4e4d100..9819121 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1862,6 +1862,10 @@ struct migration_arg {
struct set_affinity_pending *pending;
 };
 
+/*
+ * @refs: number of wait_for_completion()
+ * @stop_pending: is @stop_work in use
+ */
 struct set_affinity_pending {
refcount_t  refs;
unsigned intstop_pending;
@@ -1997,10 +2001,6 @@ out:
if (complete)
complete_all(>done);
 
-   /* For pending->{arg,stop_work} */
-   if (pending && refcount_dec_and_test(>refs))
-   wake_up_var(>refs);
-
return 0;
 }
 
@@ -2199,12 +2199,16 @@ static int affine_move_task(struct rq *rq, struct 
task_struct *p, struct rq_flag
push_task = get_task_struct(p);
}
 
+   /*
+* If there are pending waiters, but no pending stop_work,
+* then complete now.
+*/
pending = p->migration_pending;
-   if (pending) {
-   refcount_inc(>refs);
+   if (pending && !pending->stop_pending) {
p->migration_pending = NULL;
complete = true;
}
+
task_rq_unlock(rq, p, rf);
 
if (push_task) {
@@ -2213,7 +2217,7 @@ static int affine_move_task(struct rq *rq, struct 
task_struct *p, struct rq_flag
}
 
if (complete)
-   goto do_complete;
+   complete_all(>done);
 
return 0;
}
@@ -2264,9 +2268,9 @@ static int affine_move_task(struct rq *rq, struct 
task_struct *p, struct rq_flag
if (!stop_pending)
pending->stop_pending = true;
 
-   refcount_inc(>refs); /* pending->{arg,stop_work} */
if (flags & SCA_MIGRATE_ENABLE)
p->migration_flags &= ~MDF_PUSH;
+
task_rq_unlock(rq, p, rf);
 
if (!stop_pending) {
@@ -2282,12 +2286,13 @@ static int affine_move_task(struct rq *rq, struct 
task_struct *p, struct rq_flag
if (task_on_rq_queued(p))
rq = move_queued_task(rq, rf, p, dest_cpu);
 
-   p->migration_pending = NULL;
-   complete = true;
+   if (!pending->stop_pending) {
+   p->migration_pending = NULL;
+   complete = true;
+   }
}
task_rq_unlock(rq, p, rf);
 
-do_complete:
if (complete)
complete_all(>done);
}
@@ -2295,7 +2300,7 @@ do_complete:
wait_for_completion(>done);
 
if (refcount_dec_and_test(>refs))
-   wake_up_var(>refs);
+   wake_up_var(>refs); /* No UaF, just an address */
 
/*
 * Block the original owner of  until all subsequent callers
@@ -2303,6 +2308,9 @@ do_complete:
 */
wait_var_event(_pending.refs, !refcount_read(_pending.refs));
 
+   /* ARGH */
+   WARN_ON_ONCE(my_pending.stop_pending);
+
return 0;
 }
 


[tip: sched/core] sched: Collate affine_move_task() stoppers

2021-03-06 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: 58b1a45086b5f80f2b2842aa7ed0da51a64a302b
Gitweb:
https://git.kernel.org/tip/58b1a45086b5f80f2b2842aa7ed0da51a64a302b
Author:Peter Zijlstra 
AuthorDate:Wed, 24 Feb 2021 11:15:23 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:40:21 +01:00

sched: Collate affine_move_task() stoppers

The SCA_MIGRATE_ENABLE and task_running() cases are almost identical,
collapse them to avoid further duplication.

Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()")
Cc: sta...@kernel.org
Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Reviewed-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210224131355.500108...@infradead.org
---
 kernel/sched/core.c | 23 ---
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 088e8f4..84b657f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2239,30 +2239,23 @@ static int affine_move_task(struct rq *rq, struct 
task_struct *p, struct rq_flag
return -EINVAL;
}
 
-   if (flags & SCA_MIGRATE_ENABLE) {
-
-   refcount_inc(>refs); /* pending->{arg,stop_work} */
-   p->migration_flags &= ~MDF_PUSH;
-   task_rq_unlock(rq, p, rf);
-
-   stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
-   >arg, >stop_work);
-
-   return 0;
-   }
-
if (task_running(rq, p) || p->state == TASK_WAKING) {
/*
-* Lessen races (and headaches) by delegating
-* is_migration_disabled(p) checks to the stopper, which will
-* run on the same CPU as said p.
+* MIGRATE_ENABLE gets here because 'p == current', but for
+* anything else we cannot do is_migration_disabled(), punt
+* and have the stopper function handle it all race-free.
 */
+
refcount_inc(>refs); /* pending->{arg,stop_work} */
+   if (flags & SCA_MIGRATE_ENABLE)
+   p->migration_flags &= ~MDF_PUSH;
task_rq_unlock(rq, p, rf);
 
stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
>arg, >stop_work);
 
+   if (flags & SCA_MIGRATE_ENABLE)
+   return 0;
} else {
 
if (!is_migration_disabled(p)) {


[tip: sched/core] sched: Simplify migration_cpu_stop()

2021-03-06 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: c20cf065d4a619d394d23290093b1002e27dff86
Gitweb:
https://git.kernel.org/tip/c20cf065d4a619d394d23290093b1002e27dff86
Author:Peter Zijlstra 
AuthorDate:Wed, 24 Feb 2021 11:50:39 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:40:20 +01:00

sched: Simplify migration_cpu_stop()

When affine_move_task() issues a migration_cpu_stop(), the purpose of
that function is to complete that @pending, not any random other
p->migration_pending that might have gotten installed since.

This realization much simplifies migration_cpu_stop() and allows
further necessary steps to fix all this as it provides the guarantee
that @pending's stopper will complete @pending (and not some random
other @pending).

Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()")
Cc: sta...@kernel.org
Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Reviewed-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210224131355.430014...@infradead.org
---
 kernel/sched/core.c | 56 ++--
 1 file changed, 8 insertions(+), 48 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 79ddba5..088e8f4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1898,8 +1898,8 @@ static struct rq *__migrate_task(struct rq *rq, struct 
rq_flags *rf,
  */
 static int migration_cpu_stop(void *data)
 {
-   struct set_affinity_pending *pending;
struct migration_arg *arg = data;
+   struct set_affinity_pending *pending = arg->pending;
struct task_struct *p = arg->task;
int dest_cpu = arg->dest_cpu;
struct rq *rq = this_rq();
@@ -1921,25 +1921,6 @@ static int migration_cpu_stop(void *data)
raw_spin_lock(>pi_lock);
rq_lock(rq, );
 
-   pending = p->migration_pending;
-   if (pending && !arg->pending) {
-   /*
-* This happens from sched_exec() and migrate_task_to(),
-* neither of them care about pending and just want a task to
-* maybe move about.
-*
-* Even if there is a pending, we can ignore it, since
-* affine_move_task() will have it's own stop_work's in flight
-* which will manage the completion.
-*
-* Notably, pending doesn't need to match arg->pending. This can
-* happen when tripple concurrent affine_move_task() first sets
-* pending, then clears pending and eventually sets another
-* pending.
-*/
-   pending = NULL;
-   }
-
/*
 * If task_rq(p) != rq, it cannot be migrated here, because we're
 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
@@ -1950,31 +1931,20 @@ static int migration_cpu_stop(void *data)
goto out;
 
if (pending) {
-   p->migration_pending = NULL;
+   if (p->migration_pending == pending)
+   p->migration_pending = NULL;
complete = true;
}
 
-   /* migrate_enable() --  we must not race against SCA */
-   if (dest_cpu < 0) {
-   /*
-* When this was migrate_enable() but we no longer
-* have a @pending, a concurrent SCA 'fixed' things
-* and we should be valid again. Nothing to do.
-*/
-   if (!pending) {
-   WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), 
>cpus_mask));
-   goto out;
-   }
-
+   if (dest_cpu < 0)
dest_cpu = cpumask_any_distribute(>cpus_mask);
-   }
 
if (task_on_rq_queued(p))
rq = __migrate_task(rq, , p, dest_cpu);
else
p->wake_cpu = dest_cpu;
 
-   } else if (dest_cpu < 0 || pending) {
+   } else if (pending) {
/*
 * This happens when we get migrated between migrate_enable()'s
 * preempt_enable() and scheduling the stopper task. At that
@@ -1989,23 +1959,14 @@ static int migration_cpu_stop(void *data)
 * ->pi_lock, so the allowed mask is stable - if it got
 * somewhere allowed, we're done.
 */
-   if (pending && cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
-   p->migration_pending = NULL;
+   if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
+   if (p->migration_pending == pending)
+   p->migration_pending = NULL;
complete = true;
 

[tip: sched/core] sched: Optimize migration_cpu_stop()

2021-03-06 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: 3f1bc119cd7fc987c8ed25ffb717f99403bb308c
Gitweb:
https://git.kernel.org/tip/3f1bc119cd7fc987c8ed25ffb717f99403bb308c
Author:Peter Zijlstra 
AuthorDate:Wed, 24 Feb 2021 11:21:35 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:40:21 +01:00

sched: Optimize migration_cpu_stop()

When the purpose of migration_cpu_stop() is to migrate the task to
'any' valid CPU, don't migrate the task when it's already running on a
valid CPU.

Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()")
Cc: sta...@kernel.org
Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Reviewed-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210224131355.569238...@infradead.org
---
 kernel/sched/core.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 84b657f..ac05afb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1936,14 +1936,25 @@ static int migration_cpu_stop(void *data)
complete = true;
}
 
-   if (dest_cpu < 0)
+   if (dest_cpu < 0) {
+   if (cpumask_test_cpu(task_cpu(p), >cpus_mask))
+   goto out;
+
dest_cpu = cpumask_any_distribute(>cpus_mask);
+   }
 
if (task_on_rq_queued(p))
rq = __migrate_task(rq, , p, dest_cpu);
else
p->wake_cpu = dest_cpu;
 
+   /*
+* XXX __migrate_task() can fail, at which point we might end
+* up running on a dodgy CPU, AFAICT this can only happen
+* during CPU hotplug, at which point we'll get pushed out
+* anyway, so it's probably not a big deal.
+*/
+
} else if (pending) {
/*
 * This happens when we get migrated between migrate_enable()'s


[tip: sched/core] sched: Fix migration_cpu_stop() requeueing

2021-03-06 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: 8a6edb5257e2a84720fe78cb179eca58ba76126f
Gitweb:
https://git.kernel.org/tip/8a6edb5257e2a84720fe78cb179eca58ba76126f
Author:Peter Zijlstra 
AuthorDate:Sat, 13 Feb 2021 13:10:35 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:40:20 +01:00

sched: Fix migration_cpu_stop() requeueing

When affine_move_task(p) is called on a running task @p, which is not
otherwise already changing affinity, we'll first set
p->migration_pending and then do:

 stop_one_cpu(cpu_of_rq(rq), migration_cpu_stop, );

This then gets us to migration_cpu_stop() running on the CPU that was
previously running our victim task @p.

If we find that our task is no longer on that runqueue (this can
happen because of a concurrent migration due to load-balance etc.),
then we'll end up at the:

} else if (dest_cpu < 1 || pending) {

branch. Which we'll take because we set pending earlier. Here we first
check if the task @p has already satisfied the affinity constraints,
if so we bail early [A]. Otherwise we'll reissue migration_cpu_stop()
onto the CPU that is now hosting our task @p:

stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
>arg, >stop_work);

Except, we've never initialized pending->arg, which will be all 0s.

This then results in running migration_cpu_stop() on the next CPU with
arg->p == NULL, which gives the by now obvious result of fireworks.

The cure is to change affine_move_task() to always use pending->arg,
furthermore we can use the exact same pattern as the
SCA_MIGRATE_ENABLE case, since we'll block on the pending->done
completion anyway, no point in adding yet another completion in
stop_one_cpu().

This then gives a clear distinction between the two
migration_cpu_stop() use cases:

  - sched_exec() / migrate_task_to() : arg->pending == NULL
  - affine_move_task() : arg->pending != NULL;

And we can have it ignore p->migration_pending when !arg->pending. Any
stop work from sched_exec() / migrate_task_to() is in addition to stop
works from affine_move_task(), which will be sufficient to issue the
completion.

Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()")
Cc: sta...@kernel.org
Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Reviewed-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210224131355.357743...@infradead.org
---
 kernel/sched/core.c | 39 ---
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ca2bb62..79ddba5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1922,6 +1922,24 @@ static int migration_cpu_stop(void *data)
rq_lock(rq, );
 
pending = p->migration_pending;
+   if (pending && !arg->pending) {
+   /*
+* This happens from sched_exec() and migrate_task_to(),
+* neither of them care about pending and just want a task to
+* maybe move about.
+*
+* Even if there is a pending, we can ignore it, since
+* affine_move_task() will have it's own stop_work's in flight
+* which will manage the completion.
+*
+* Notably, pending doesn't need to match arg->pending. This can
+* happen when tripple concurrent affine_move_task() first sets
+* pending, then clears pending and eventually sets another
+* pending.
+*/
+   pending = NULL;
+   }
+
/*
 * If task_rq(p) != rq, it cannot be migrated here, because we're
 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
@@ -2194,10 +2212,6 @@ static int affine_move_task(struct rq *rq, struct 
task_struct *p, struct rq_flag
int dest_cpu, unsigned int flags)
 {
struct set_affinity_pending my_pending = { }, *pending = NULL;
-   struct migration_arg arg = {
-   .task = p,
-   .dest_cpu = dest_cpu,
-   };
bool complete = false;
 
/* Can the task run on the task's current CPU? If so, we're done */
@@ -2235,6 +2249,12 @@ static int affine_move_task(struct rq *rq, struct 
task_struct *p, struct rq_flag
/* Install the request */
refcount_set(_pending.refs, 1);
init_completion(_pending.done);
+   my_pending.arg = (struct migration_arg) {
+   .task = p,
+   .dest_cpu = -1, /* any */
+   .pending = _pending,
+   };
+
p->migration_pending = _pending;
} else {
pending = p->migration_pending;
@@ -2265,12 +2285,6 @@ 

[tip: sched/core] sched: Fix affine_move_task() self-concurrency

2021-03-06 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: 9e81889c7648d48dd5fe13f41cbc99f3c362484a
Gitweb:
https://git.kernel.org/tip/9e81889c7648d48dd5fe13f41cbc99f3c362484a
Author:Peter Zijlstra 
AuthorDate:Wed, 24 Feb 2021 11:31:09 +01:00
Committer: Ingo Molnar 
CommitterDate: Sat, 06 Mar 2021 12:40:21 +01:00

sched: Fix affine_move_task() self-concurrency

Consider:

   sched_setaffinity(p, X); sched_setaffinity(p, Y);

Then the first will install p->migration_pending = _pending; and
issue stop_one_cpu_nowait(pending); and the second one will read
p->migration_pending and _also_ issue: stop_one_cpu_nowait(pending),
the _SAME_ @pending.

This causes stopper list corruption.

Add set_affinity_pending::stop_pending, to indicate if a stopper is in
progress.

Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()")
Cc: sta...@kernel.org
Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Reviewed-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210224131355.649146...@infradead.org
---
 kernel/sched/core.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ac05afb..4e4d100 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1864,6 +1864,7 @@ struct migration_arg {
 
 struct set_affinity_pending {
refcount_t  refs;
+   unsigned intstop_pending;
struct completion   done;
struct cpu_stop_workstop_work;
struct migration_argarg;
@@ -1982,12 +1983,15 @@ static int migration_cpu_stop(void *data)
 * determine is_migration_disabled() and so have to chase after
 * it.
 */
+   WARN_ON_ONCE(!pending->stop_pending);
task_rq_unlock(rq, p, );
stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop,
>arg, >stop_work);
return 0;
}
 out:
+   if (pending)
+   pending->stop_pending = false;
task_rq_unlock(rq, p, );
 
if (complete)
@@ -2183,7 +2187,7 @@ static int affine_move_task(struct rq *rq, struct 
task_struct *p, struct rq_flag
int dest_cpu, unsigned int flags)
 {
struct set_affinity_pending my_pending = { }, *pending = NULL;
-   bool complete = false;
+   bool stop_pending, complete = false;
 
/* Can the task run on the task's current CPU? If so, we're done */
if (cpumask_test_cpu(task_cpu(p), >cpus_mask)) {
@@ -2256,14 +2260,19 @@ static int affine_move_task(struct rq *rq, struct 
task_struct *p, struct rq_flag
 * anything else we cannot do is_migration_disabled(), punt
 * and have the stopper function handle it all race-free.
 */
+   stop_pending = pending->stop_pending;
+   if (!stop_pending)
+   pending->stop_pending = true;
 
refcount_inc(>refs); /* pending->{arg,stop_work} */
if (flags & SCA_MIGRATE_ENABLE)
p->migration_flags &= ~MDF_PUSH;
task_rq_unlock(rq, p, rf);
 
-   stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
-   >arg, >stop_work);
+   if (!stop_pending) {
+   stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
+   >arg, >stop_work);
+   }
 
if (flags & SCA_MIGRATE_ENABLE)
return 0;


[tip: objtool/core] objtool,x86: Rewrite LEA decode

2021-03-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 7ee93978f459ace4e0fe30af582d343d5fb6421a
Gitweb:
https://git.kernel.org/tip/7ee93978f459ace4e0fe30af582d343d5fb6421a
Author:Peter Zijlstra 
AuthorDate:Tue, 09 Feb 2021 21:29:16 +01:00
Committer: Peter Zijlstra 
CommitterDate: Wed, 03 Mar 2021 09:38:29 +01:00

objtool,x86: Rewrite LEA decode

Current LEA decoding is a bunch of special cases, properly decode the
instruction, with exception of full SIB and RIP-relative modes.

Signed-off-by: Peter Zijlstra (Intel) 
Acked-by: Josh Poimboeuf 
Tested-by: Nick Desaulniers 
Link: https://lkml.kernel.org/r/20210211173627.143250...@infradead.org
---
 tools/objtool/arch/x86/decode.c | 86 ++--
 1 file changed, 28 insertions(+), 58 deletions(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 549813c..d8f0138 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -91,9 +91,10 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
 {
struct insn insn;
int x86_64, sign;
-   unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0,
- rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0,
- modrm_reg = 0, sib = 0;
+   unsigned char op1, op2,
+ rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0,
+ modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0,
+ sib = 0;
struct stack_op *op = NULL;
struct symbol *sym;
 
@@ -328,68 +329,37 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
break;
 
case 0x8d:
-   if (sib == 0x24 && rex_w && !rex_b && !rex_x) {
-
-   ADD_OP(op) {
-   if (!insn.displacement.value) {
-   /* lea (%rsp), reg */
-   op->src.type = OP_SRC_REG;
-   } else {
-   /* lea disp(%rsp), reg */
-   op->src.type = OP_SRC_ADD;
-   op->src.offset = 
insn.displacement.value;
-   }
-   op->src.reg = CFI_SP;
-   op->dest.type = OP_DEST_REG;
-   op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
-   }
-
-   } else if (rex == 0x48 && modrm == 0x65) {
-
-   /* lea disp(%rbp), %rsp */
-   ADD_OP(op) {
-   op->src.type = OP_SRC_ADD;
-   op->src.reg = CFI_BP;
-   op->src.offset = insn.displacement.value;
-   op->dest.type = OP_DEST_REG;
-   op->dest.reg = CFI_SP;
-   }
+   if (modrm_mod == 3) {
+   WARN("invalid LEA encoding at %s:0x%lx", sec->name, 
offset);
+   break;
+   }
 
-   } else if (rex == 0x49 && modrm == 0x62 &&
-  insn.displacement.value == -8) {
+   /* skip non 64bit ops */
+   if (!rex_w)
+   break;
 
-   /*
-* lea -0x8(%r10), %rsp
-*
-* Restoring rsp back to its original value after a
-* stack realignment.
-*/
-   ADD_OP(op) {
-   op->src.type = OP_SRC_ADD;
-   op->src.reg = CFI_R10;
-   op->src.offset = -8;
-   op->dest.type = OP_DEST_REG;
-   op->dest.reg = CFI_SP;
-   }
+   /* skip nontrivial SIB */
+   if (modrm_rm == 4 && !(sib == 0x24 && rex_b == rex_x))
+   break;
 
-   } else if (rex == 0x49 && modrm == 0x65 &&
-  insn.displacement.value == -16) {
+   /* skip RIP relative displacement */
+   if (modrm_rm == 5 && modrm_mod == 0)
+   break;
 
-   /*
-* lea -0x10(%r13), %rsp
-*
-* Restoring rsp back to its original value after a
-* stack realignment.
-*/
-   ADD_OP(op) {
+   /* lea disp(%src), %dst */
+   ADD_OP(op) {
+   op->src.offset = insn.displacement.value;
+   if (!op->src.offset) {
+   /* lea 

[tip: objtool/core] objtool,x86: More ModRM sugar

2021-03-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 7e1b2eb05787d1c7f18445b7cfdfc612e827ca7b
Gitweb:
https://git.kernel.org/tip/7e1b2eb05787d1c7f18445b7cfdfc612e827ca7b
Author:Peter Zijlstra 
AuthorDate:Fri, 12 Feb 2021 09:13:00 +01:00
Committer: Peter Zijlstra 
CommitterDate: Wed, 03 Mar 2021 09:38:31 +01:00

objtool,x86: More ModRM sugar

Better helpers to decode ModRM.

Signed-off-by: Peter Zijlstra (Intel) 
Acked-by: Josh Poimboeuf 
Link: https://lkml.kernel.org/r/YCZB/ljatfxqq...@hirez.programming.kicks-ass.net
---
 tools/objtool/arch/x86/decode.c | 28 +---
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index b42e5ec..431bafb 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -82,15 +82,21 @@ unsigned long arch_jump_destination(struct instruction 
*insn)
  * 01 |  [r/m + d8]|[S+d]|   [r/m + d8]  |
  * 10 |  [r/m + d32]   |[S+D]|   [r/m + d32] |
  * 11 |   r/ m   |
- *
  */
+
+#define mod_is_mem()   (modrm_mod != 3)
+#define mod_is_reg()   (modrm_mod == 3)
+
 #define is_RIP()   ((modrm_rm & 7) == CFI_BP && modrm_mod == 0)
-#define have_SIB() ((modrm_rm & 7) == CFI_SP && modrm_mod != 3)
+#define have_SIB() ((modrm_rm & 7) == CFI_SP && mod_is_mem())
 
 #define rm_is(reg) (have_SIB() ? \
sib_base == (reg) && sib_index == CFI_SP : \
modrm_rm == (reg))
 
+#define rm_is_mem(reg) (mod_is_mem() && !is_RIP() && rm_is(reg))
+#define rm_is_reg(reg) (mod_is_reg() && modrm_rm == (reg))
+
 int arch_decode_instruction(const struct elf *elf, const struct section *sec,
unsigned long offset, unsigned int maxlen,
unsigned int *len, enum insn_type *type,
@@ -154,7 +160,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
 
case 0x1:
case 0x29:
-   if (rex_w && modrm_mod == 3 && modrm_rm == CFI_SP) {
+   if (rex_w && rm_is_reg(CFI_SP)) {
 
/* add/sub reg, %rsp */
ADD_OP(op) {
@@ -219,7 +225,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
break;
 
/* %rsp target only */
-   if (!(modrm_mod == 3 && modrm_rm == CFI_SP))
+   if (!rm_is_reg(CFI_SP))
break;
 
imm = insn.immediate.value;
@@ -272,7 +278,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
 
if (modrm_reg == CFI_SP) {
 
-   if (modrm_mod == 3) {
+   if (mod_is_reg()) {
/* mov %rsp, reg */
ADD_OP(op) {
op->src.type = OP_SRC_REG;
@@ -308,7 +314,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
break;
}
 
-   if (modrm_mod == 3 && modrm_rm == CFI_SP) {
+   if (rm_is_reg(CFI_SP)) {
 
/* mov reg, %rsp */
ADD_OP(op) {
@@ -325,7 +331,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
if (!rex_w)
break;
 
-   if ((modrm_mod == 1 || modrm_mod == 2) && modrm_rm == CFI_BP) {
+   if (rm_is_mem(CFI_BP)) {
 
/* mov reg, disp(%rbp) */
ADD_OP(op) {
@@ -338,7 +344,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
break;
}
 
-   if (modrm_mod != 3 && rm_is(CFI_SP)) {
+   if (rm_is_mem(CFI_SP)) {
 
/* mov reg, disp(%rsp) */
ADD_OP(op) {
@@ -357,7 +363,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
if (!rex_w)
break;
 
-   if ((modrm_mod == 1 || modrm_mod == 2) && modrm_rm == CFI_BP) {
+   if (rm_is_mem(CFI_BP)) {
 
/* mov disp(%rbp), reg */
ADD_OP(op) {
@@ -370,7 +376,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
break;
}
 
-   if (modrm_mod != 3 && rm_is(CFI_SP)) {
+   if (rm_is_mem(CFI_SP)) {
 
/* mov disp(%rsp), reg */
ADD_OP(op) {
@@ -386,7 +392,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
break;
 
case 0x8d:
-   if (modrm_mod == 3) {
+   if (mod_is_reg()) {
WARN("invalid LEA 

[tip: objtool/core] objtool: Add --backup

2021-03-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 327695eb9e3461b09d5cd5baef5df6526dd240c6
Gitweb:
https://git.kernel.org/tip/327695eb9e3461b09d5cd5baef5df6526dd240c6
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Feb 2021 10:59:59 +01:00
Committer: Peter Zijlstra 
CommitterDate: Wed, 03 Mar 2021 09:38:31 +01:00

objtool: Add --backup

Teach objtool to write backups files, such that it becomes easier to
see what objtool did to the object file.

Backup files will be ${name}.orig.

Suggested-by: Borislav Petkov 
Signed-off-by: Peter Zijlstra (Intel) 
Acked-by: Borislav Petkov 
Acked-by: Josh Poimboeuf 
Link: https://lkml.kernel.org/r/yd4obt3aoxpwl...@hirez.programming.kicks-ass.net
---
 tools/objtool/builtin-check.c   |  4 +-
 tools/objtool/include/objtool/builtin.h |  3 +-
 tools/objtool/objtool.c | 64 -
 3 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index c3a85d8..97f063d 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -18,7 +18,8 @@
 #include 
 #include 
 
-bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, 
validate_dup, vmlinux, mcount, noinstr;
+bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
+ validate_dup, vmlinux, mcount, noinstr, backup;
 
 static const char * const check_usage[] = {
"objtool check [] file.o",
@@ -37,6 +38,7 @@ const struct option check_options[] = {
OPT_BOOLEAN('n', "noinstr", , "noinstr validation for 
vmlinux.o"),
OPT_BOOLEAN('l', "vmlinux", , "vmlinux.o validation"),
OPT_BOOLEAN('M', "mcount", , "generate __mcount_loc"),
+   OPT_BOOLEAN('B', "backup", , "create .orig files before 
modification"),
OPT_END(),
 };
 
diff --git a/tools/objtool/include/objtool/builtin.h 
b/tools/objtool/include/objtool/builtin.h
index 2502bb2..d019210 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -8,7 +8,8 @@
 #include 
 
 extern const struct option check_options[];
-extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, 
stats, validate_dup, vmlinux, mcount, noinstr;
+extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, 
stats,
+validate_dup, vmlinux, mcount, noinstr, backup;
 
 extern int cmd_check(int argc, const char **argv);
 extern int cmd_orc(int argc, const char **argv);
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 7b97ce4..43c1836 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -44,6 +45,64 @@ bool help;
 const char *objname;
 static struct objtool_file file;
 
+static bool objtool_create_backup(const char *_objname)
+{
+   int len = strlen(_objname);
+   char *buf, *base, *name = malloc(len+6);
+   int s, d, l, t;
+
+   if (!name) {
+   perror("failed backup name malloc");
+   return false;
+   }
+
+   strcpy(name, _objname);
+   strcpy(name + len, ".orig");
+
+   d = open(name, O_CREAT|O_WRONLY|O_TRUNC, 0644);
+   if (d < 0) {
+   perror("failed to create backup file");
+   return false;
+   }
+
+   s = open(_objname, O_RDONLY);
+   if (s < 0) {
+   perror("failed to open orig file");
+   return false;
+   }
+
+   buf = malloc(4096);
+   if (!buf) {
+   perror("failed backup data malloc");
+   return false;
+   }
+
+   while ((l = read(s, buf, 4096)) > 0) {
+   base = buf;
+   do {
+   t = write(d, base, l);
+   if (t < 0) {
+   perror("failed backup write");
+   return false;
+   }
+   base += t;
+   l -= t;
+   } while (l);
+   }
+
+   if (l < 0) {
+   perror("failed backup read");
+   return false;
+   }
+
+   free(name);
+   free(buf);
+   close(d);
+   close(s);
+
+   return true;
+}
+
 struct objtool_file *objtool_open_read(const char *_objname)
 {
if (objname) {
@@ -59,6 +118,11 @@ struct objtool_file *objtool_open_read(const char *_objname)
if (!file.elf)
return NULL;
 
+   if (backup && !objtool_create_backup(objname)) {
+   WARN("can't create backup file");
+   return NULL;
+   }
+
INIT_LIST_HEAD(_list);
hash_init(file.insn_hash);
INIT_LIST_HEAD(_call_list);


[tip: objtool/core] objtool,x86: Renumber CFI_reg

2021-03-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 5e506daa2d148f735f90b2018ca6ef6e52144fad
Gitweb:
https://git.kernel.org/tip/5e506daa2d148f735f90b2018ca6ef6e52144fad
Author:Peter Zijlstra 
AuthorDate:Tue, 09 Feb 2021 20:18:21 +01:00
Committer: Peter Zijlstra 
CommitterDate: Wed, 03 Mar 2021 09:38:29 +01:00

objtool,x86: Renumber CFI_reg

Make them match the instruction encoding numbering.

Signed-off-by: Peter Zijlstra (Intel) 
Acked-by: Josh Poimboeuf 
Tested-by: Nick Desaulniers 
Link: https://lkml.kernel.org/r/20210211173627.033720...@infradead.org
---
 tools/objtool/arch/x86/include/arch/cfi_regs.h | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/objtool/arch/x86/include/arch/cfi_regs.h 
b/tools/objtool/arch/x86/include/arch/cfi_regs.h
index 79bc517..0579d22 100644
--- a/tools/objtool/arch/x86/include/arch/cfi_regs.h
+++ b/tools/objtool/arch/x86/include/arch/cfi_regs.h
@@ -4,13 +4,13 @@
 #define _OBJTOOL_CFI_REGS_H
 
 #define CFI_AX 0
-#define CFI_DX 1
-#define CFI_CX 2
+#define CFI_CX 1
+#define CFI_DX 2
 #define CFI_BX 3
-#define CFI_SI 4
-#define CFI_DI 5
-#define CFI_BP 6
-#define CFI_SP 7
+#define CFI_SP 4
+#define CFI_BP 5
+#define CFI_SI 6
+#define CFI_DI 7
 #define CFI_R8 8
 #define CFI_R9 9
 #define CFI_R1010


[tip: objtool/core] objtool,x86: Rewrite LEAVE

2021-03-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: a91451516348221f2477205eca9e813830e01fa3
Gitweb:
https://git.kernel.org/tip/a91451516348221f2477205eca9e813830e01fa3
Author:Peter Zijlstra 
AuthorDate:Tue, 09 Feb 2021 21:41:13 +01:00
Committer: Peter Zijlstra 
CommitterDate: Wed, 03 Mar 2021 09:38:30 +01:00

objtool,x86: Rewrite LEAVE

Since we can now have multiple stack-ops per instruction, we don't
need to special case LEAVE and can simply emit the composite
operations.

Signed-off-by: Peter Zijlstra (Intel) 
Acked-by: Josh Poimboeuf 
Tested-by: Nick Desaulniers 
Link: https://lkml.kernel.org/r/20210211173627.253273...@infradead.org
---
 tools/objtool/arch/x86/decode.c  | 14 +++---
 tools/objtool/check.c| 24 ++--
 tools/objtool/include/objtool/arch.h |  1 -
 3 files changed, 13 insertions(+), 26 deletions(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index d8f0138..47b9acf 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -446,9 +446,17 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
 * mov bp, sp
 * pop bp
 */
-   ADD_OP(op)
-   op->dest.type = OP_DEST_LEAVE;
-
+   ADD_OP(op) {
+   op->src.type = OP_SRC_REG;
+   op->src.reg = CFI_BP;
+   op->dest.type = OP_DEST_REG;
+   op->dest.reg = CFI_SP;
+   }
+   ADD_OP(op) {
+   op->src.type = OP_SRC_POP;
+   op->dest.type = OP_DEST_REG;
+   op->dest.reg = CFI_BP;
+   }
break;
 
case 0xe3:
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 12b8f0f..a0f762a 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -2020,7 +2020,7 @@ static int update_cfi_state(struct instruction *insn,
}
 
else if (op->src.reg == CFI_BP && op->dest.reg == 
CFI_SP &&
-cfa->base == CFI_BP) {
+(cfa->base == CFI_BP || cfa->base == 
cfi->drap_reg)) {
 
/*
 * mov %rbp, %rsp
@@ -2217,7 +2217,7 @@ static int update_cfi_state(struct instruction *insn,
cfa->offset = 0;
cfi->drap_offset = -1;
 
-   } else if (regs[op->dest.reg].offset == 
-cfi->stack_size) {
+   } else if (cfi->stack_size == 
-regs[op->dest.reg].offset) {
 
/* pop %reg */
restore_reg(cfi, op->dest.reg);
@@ -2358,26 +2358,6 @@ static int update_cfi_state(struct instruction *insn,
 
break;
 
-   case OP_DEST_LEAVE:
-   if ((!cfi->drap && cfa->base != CFI_BP) ||
-   (cfi->drap && cfa->base != cfi->drap_reg)) {
-   WARN_FUNC("leave instruction with modified stack frame",
- insn->sec, insn->offset);
-   return -1;
-   }
-
-   /* leave (mov %rbp, %rsp; pop %rbp) */
-
-   cfi->stack_size = -cfi->regs[CFI_BP].offset - 8;
-   restore_reg(cfi, CFI_BP);
-
-   if (!cfi->drap) {
-   cfa->base = CFI_SP;
-   cfa->offset -= 8;
-   }
-
-   break;
-
case OP_DEST_MEM:
if (op->src.type != OP_SRC_POP && op->src.type != OP_SRC_POPF) {
WARN_FUNC("unknown stack-related memory operation",
diff --git a/tools/objtool/include/objtool/arch.h 
b/tools/objtool/include/objtool/arch.h
index 6ff0685..ff21f38 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -35,7 +35,6 @@ enum op_dest_type {
OP_DEST_MEM,
OP_DEST_PUSH,
OP_DEST_PUSHF,
-   OP_DEST_LEAVE,
 };
 
 struct op_dest {


[tip: objtool/core] objtool,x86: Simplify register decode

2021-03-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 9d5a2c2caa10fc135d7020e76baa6a17c52e608f
Gitweb:
https://git.kernel.org/tip/9d5a2c2caa10fc135d7020e76baa6a17c52e608f
Author:Peter Zijlstra 
AuthorDate:Tue, 09 Feb 2021 19:59:43 +01:00
Committer: Peter Zijlstra 
CommitterDate: Wed, 03 Mar 2021 09:38:30 +01:00

objtool,x86: Simplify register decode

Since the CFI_reg number now matches the instruction encoding order do
away with the op_to_cfi_reg[] and use direct assignment.

Signed-off-by: Peter Zijlstra (Intel) 
Acked-by: Josh Poimboeuf 
Tested-by: Nick Desaulniers 
Link: https://lkml.kernel.org/r/20210211173627.362004...@infradead.org
---
 tools/objtool/arch/x86/decode.c | 79 +++-
 1 file changed, 39 insertions(+), 40 deletions(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 47b9acf..5ce7dc4 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -17,17 +17,6 @@
 #include 
 #include 
 
-static unsigned char op_to_cfi_reg[][2] = {
-   {CFI_AX, CFI_R8},
-   {CFI_CX, CFI_R9},
-   {CFI_DX, CFI_R10},
-   {CFI_BX, CFI_R11},
-   {CFI_SP, CFI_R12},
-   {CFI_BP, CFI_R13},
-   {CFI_SI, CFI_R14},
-   {CFI_DI, CFI_R15},
-};
-
 static int is_x86_64(const struct elf *elf)
 {
switch (elf->ehdr.e_machine) {
@@ -94,7 +83,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
unsigned char op1, op2,
  rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0,
  modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0,
- sib = 0;
+ sib = 0 /* , sib_scale = 0, sib_index = 0, sib_base = 0 
*/;
struct stack_op *op = NULL;
struct symbol *sym;
 
@@ -130,23 +119,29 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
if (insn.modrm.nbytes) {
modrm = insn.modrm.bytes[0];
modrm_mod = X86_MODRM_MOD(modrm);
-   modrm_reg = X86_MODRM_REG(modrm);
-   modrm_rm = X86_MODRM_RM(modrm);
+   modrm_reg = X86_MODRM_REG(modrm) + 8*rex_r;
+   modrm_rm  = X86_MODRM_RM(modrm)  + 8*rex_b;
}
 
-   if (insn.sib.nbytes)
+   if (insn.sib.nbytes) {
sib = insn.sib.bytes[0];
+   /*
+   sib_scale = X86_SIB_SCALE(sib);
+   sib_index = X86_SIB_INDEX(sib) + 8*rex_x;
+   sib_base  = X86_SIB_BASE(sib)  + 8*rex_b;
+*/
+   }
 
switch (op1) {
 
case 0x1:
case 0x29:
-   if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
+   if (rex_w && modrm_mod == 3 && modrm_rm == CFI_SP) {
 
/* add/sub reg, %rsp */
ADD_OP(op) {
op->src.type = OP_SRC_ADD;
-   op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+   op->src.reg = modrm_reg;
op->dest.type = OP_DEST_REG;
op->dest.reg = CFI_SP;
}
@@ -158,7 +153,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
/* push reg */
ADD_OP(op) {
op->src.type = OP_SRC_REG;
-   op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+   op->src.reg = (op1 & 0x7) + 8*rex_b;
op->dest.type = OP_DEST_PUSH;
}
 
@@ -170,7 +165,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
ADD_OP(op) {
op->src.type = OP_SRC_POP;
op->dest.type = OP_DEST_REG;
-   op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+   op->dest.reg = (op1 & 0x7) + 8*rex_b;
}
 
break;
@@ -223,7 +218,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
break;
 
case 0x89:
-   if (rex_w && !rex_r && modrm_reg == 4) {
+   if (rex_w && modrm_reg == CFI_SP) {
 
if (modrm_mod == 3) {
/* mov %rsp, reg */
@@ -231,17 +226,17 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
op->src.type = OP_SRC_REG;
op->src.reg = CFI_SP;
op->dest.type = OP_DEST_REG;
-   op->dest.reg = 
op_to_cfi_reg[modrm_rm][rex_b];
+   op->dest.reg = modrm_rm;
}
break;
 

[tip: objtool/core] objtool,x86: Rewrite ADD/SUB/AND

2021-03-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: e1bba6c8930b56c4afe88aa875f3d20d1cef4fe1
Gitweb:
https://git.kernel.org/tip/e1bba6c8930b56c4afe88aa875f3d20d1cef4fe1
Author:Peter Zijlstra 
AuthorDate:Wed, 10 Feb 2021 14:11:30 +01:00
Committer: Peter Zijlstra 
CommitterDate: Wed, 03 Mar 2021 09:38:31 +01:00

objtool,x86: Rewrite ADD/SUB/AND

Support sign extending and imm8 forms.

Signed-off-by: Peter Zijlstra (Intel) 
Acked-by: Josh Poimboeuf 
Tested-by: Nick Desaulniers 
Link: https://lkml.kernel.org/r/20210211173627.588366...@infradead.org
---
 tools/objtool/arch/x86/decode.c | 70 +++-
 1 file changed, 51 insertions(+), 19 deletions(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 78ae5be..b42e5ec 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -98,13 +98,14 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
struct list_head *ops_list)
 {
struct insn insn;
-   int x86_64, sign;
+   int x86_64;
unsigned char op1, op2,
  rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0,
  modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0,
  sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0;
struct stack_op *op = NULL;
struct symbol *sym;
+   u64 imm;
 
x86_64 = is_x86_64(elf);
if (x86_64 == -1)
@@ -200,12 +201,54 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
*type = INSN_JUMP_CONDITIONAL;
break;
 
-   case 0x81:
-   case 0x83:
-   if (rex != 0x48)
+   case 0x80 ... 0x83:
+   /*
+* 1000 00sw : mod OP r/m : immediate
+*
+* s - sign extend immediate
+* w - imm8 / imm32
+*
+* OP: 000 ADD100 AND
+* 001 OR 101 SUB
+* 010 ADC110 XOR
+* 011 SBB111 CMP
+*/
+
+   /* 64bit only */
+   if (!rex_w)
break;
 
-   if (modrm == 0xe4) {
+   /* %rsp target only */
+   if (!(modrm_mod == 3 && modrm_rm == CFI_SP))
+   break;
+
+   imm = insn.immediate.value;
+   if (op1 & 2) { /* sign extend */
+   if (op1 & 1) { /* imm32 */
+   imm <<= 32;
+   imm = (s64)imm >> 32;
+   } else { /* imm8 */
+   imm <<= 56;
+   imm = (s64)imm >> 56;
+   }
+   }
+
+   switch (modrm_reg & 7) {
+   case 5:
+   imm = -imm;
+   /* fallthrough */
+   case 0:
+   /* add/sub imm, %rsp */
+   ADD_OP(op) {
+   op->src.type = OP_SRC_ADD;
+   op->src.reg = CFI_SP;
+   op->src.offset = imm;
+   op->dest.type = OP_DEST_REG;
+   op->dest.reg = CFI_SP;
+   }
+   break;
+
+   case 4:
/* and imm, %rsp */
ADD_OP(op) {
op->src.type = OP_SRC_AND;
@@ -215,23 +258,12 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
op->dest.reg = CFI_SP;
}
break;
-   }
 
-   if (modrm == 0xc4)
-   sign = 1;
-   else if (modrm == 0xec)
-   sign = -1;
-   else
+   default:
+   /* WARN ? */
break;
-
-   /* add/sub imm, %rsp */
-   ADD_OP(op) {
-   op->src.type = OP_SRC_ADD;
-   op->src.reg = CFI_SP;
-   op->src.offset = insn.immediate.value * sign;
-   op->dest.type = OP_DEST_REG;
-   op->dest.reg = CFI_SP;
}
+
break;
 
case 0x89:


[tip: objtool/core] objtool: Collate parse_options() users

2021-03-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 13d52bf07c55645f9e3c430748708253d724e705
Gitweb:
https://git.kernel.org/tip/13d52bf07c55645f9e3c430748708253d724e705
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Feb 2021 11:18:24 +01:00
Committer: Peter Zijlstra 
CommitterDate: Wed, 03 Mar 2021 09:38:32 +01:00

objtool: Collate parse_options() users

Ensure there's a single place that parses check_options, in
preparation for extending where to get options from.

Signed-off-by: Peter Zijlstra (Intel) 
Acked-by: Josh Poimboeuf 
Link: https://lkml.kernel.org/r/20210226110004.193108...@infradead.org
---
 tools/objtool/builtin-check.c   | 14 +-
 tools/objtool/builtin-orc.c |  5 +
 tools/objtool/include/objtool/builtin.h |  2 ++
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 97f063d..0399752 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -42,17 +42,21 @@ const struct option check_options[] = {
OPT_END(),
 };
 
+int cmd_parse_options(int argc, const char **argv, const char * const usage[])
+{
+   argc = parse_options(argc, argv, check_options, usage, 0);
+   if (argc != 1)
+   usage_with_options(usage, check_options);
+   return argc;
+}
+
 int cmd_check(int argc, const char **argv)
 {
const char *objname;
struct objtool_file *file;
int ret;
 
-   argc = parse_options(argc, argv, check_options, check_usage, 0);
-
-   if (argc != 1)
-   usage_with_options(check_usage, check_options);
-
+   argc = cmd_parse_options(argc, argv, check_usage);
objname = argv[0];
 
file = objtool_open_read(objname);
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
index 8273bbf..17f8b93 100644
--- a/tools/objtool/builtin-orc.c
+++ b/tools/objtool/builtin-orc.c
@@ -34,10 +34,7 @@ int cmd_orc(int argc, const char **argv)
struct objtool_file *file;
int ret;
 
-   argc = parse_options(argc, argv, check_options, orc_usage, 0);
-   if (argc != 1)
-   usage_with_options(orc_usage, check_options);
-
+   argc = cmd_parse_options(argc, argv, orc_usage);
objname = argv[0];
 
file = objtool_open_read(objname);
diff --git a/tools/objtool/include/objtool/builtin.h 
b/tools/objtool/include/objtool/builtin.h
index d019210..15ac0b7 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -11,6 +11,8 @@ extern const struct option check_options[];
 extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, 
stats,
 validate_dup, vmlinux, mcount, noinstr, backup;
 
+extern int cmd_parse_options(int argc, const char **argv, const char * const 
usage[]);
+
 extern int cmd_check(int argc, const char **argv);
 extern int cmd_orc(int argc, const char **argv);
 


[tip: objtool/core] objtool,x86: Support %riz encodings

2021-03-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 0a8bef63e5bf4496251f7bac4ddadb5f5f489932
Gitweb:
https://git.kernel.org/tip/0a8bef63e5bf4496251f7bac4ddadb5f5f489932
Author:Peter Zijlstra 
AuthorDate:Wed, 10 Feb 2021 11:47:35 +01:00
Committer: Peter Zijlstra 
CommitterDate: Wed, 03 Mar 2021 09:38:30 +01:00

objtool,x86: Support %riz encodings

When there's a SIB byte, the register otherwise denoted by r/m will
then be denoted by SIB.base REX.b will now extend this. SIB.index == SP
is magic and notes an index value zero.

This means that there's a bunch of alternative (longer) encodings for
the same thing. Eg. 'ModRM.mod != 3, ModRM.r/m = AX' can be encoded as
'ModRM.mod != 3, ModRM.r/m = SP, SIB.base = AX, SIB.index = SP' which is 
actually 4
different encodings because the value of SIB.scale is irrelevant,
giving rise to 5 different but equal encodings.

Support these encodings and clean up the SIB handling in general.

Signed-off-by: Peter Zijlstra (Intel) 
Acked-by: Josh Poimboeuf 
Tested-by: Nick Desaulniers 
Link: https://lkml.kernel.org/r/20210211173627.472967...@infradead.org
---
 tools/objtool/arch/x86/decode.c | 67 ++--
 1 file changed, 48 insertions(+), 19 deletions(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 5ce7dc4..78ae5be 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -72,6 +72,25 @@ unsigned long arch_jump_destination(struct instruction *insn)
return -1; \
else for (list_add_tail(>list, ops_list); op; op = NULL)
 
+/*
+ * Helpers to decode ModRM/SIB:
+ *
+ * r/m| AX  CX  DX  BX |  SP |  BP |  SI  DI |
+ *| R8  R9 R10 R11 | R12 | R13 | R14 R15 |
+ * Mod++-+-+-+
+ * 00 |[r/m]   |[SIB]|[IP+]|  [r/m]  |
+ * 01 |  [r/m + d8]|[S+d]|   [r/m + d8]  |
+ * 10 |  [r/m + d32]   |[S+D]|   [r/m + d32] |
+ * 11 |   r/ m   |
+ *
+ */
+#define is_RIP()   ((modrm_rm & 7) == CFI_BP && modrm_mod == 0)
+#define have_SIB() ((modrm_rm & 7) == CFI_SP && modrm_mod != 3)
+
+#define rm_is(reg) (have_SIB() ? \
+   sib_base == (reg) && sib_index == CFI_SP : \
+   modrm_rm == (reg))
+
 int arch_decode_instruction(const struct elf *elf, const struct section *sec,
unsigned long offset, unsigned int maxlen,
unsigned int *len, enum insn_type *type,
@@ -83,7 +102,7 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
unsigned char op1, op2,
  rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0,
  modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0,
- sib = 0 /* , sib_scale = 0, sib_index = 0, sib_base = 0 
*/;
+ sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0;
struct stack_op *op = NULL;
struct symbol *sym;
 
@@ -125,11 +144,9 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
 
if (insn.sib.nbytes) {
sib = insn.sib.bytes[0];
-   /*
-   sib_scale = X86_SIB_SCALE(sib);
+   /* sib_scale = X86_SIB_SCALE(sib); */
sib_index = X86_SIB_INDEX(sib) + 8*rex_x;
sib_base  = X86_SIB_BASE(sib)  + 8*rex_b;
-*/
}
 
switch (op1) {
@@ -218,7 +235,10 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
break;
 
case 0x89:
-   if (rex_w && modrm_reg == CFI_SP) {
+   if (!rex_w)
+   break;
+
+   if (modrm_reg == CFI_SP) {
 
if (modrm_mod == 3) {
/* mov %rsp, reg */
@@ -231,14 +251,17 @@ int arch_decode_instruction(const struct elf *elf, const 
struct section *sec,
break;
 
} else {
-   /* skip nontrivial SIB */
-   if ((modrm_rm & 7) == 4 && !(sib == 0x24 && 
rex_b == rex_x))
-   break;
-
/* skip RIP relative displacement */
-   if ((modrm_rm & 7) == 5 && modrm_mod == 0)
+   if (is_RIP())
break;
 
+   /* skip nontrivial SIB */
+   if (have_SIB()) {
+   modrm_rm = sib_base;
+   if (sib_index != CFI_SP)
+   break;
+   }
+
/* mov %rsp, disp(%reg) */
ADD_OP(op) {

[tip: objtool/core] objtool: Parse options from OBJTOOL_ARGS

2021-03-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: b52eb21aeca75790869c26b91b1d7b80b3946430
Gitweb:
https://git.kernel.org/tip/b52eb21aeca75790869c26b91b1d7b80b3946430
Author:Peter Zijlstra 
AuthorDate:Fri, 26 Feb 2021 11:32:30 +01:00
Committer: Peter Zijlstra 
CommitterDate: Wed, 03 Mar 2021 09:38:32 +01:00

objtool: Parse options from OBJTOOL_ARGS

Teach objtool to parse options from the OBJTOOL_ARGS environment
variable.

This enables things like:

  $ OBJTOOL_ARGS="--backup" make O=defconfig-build/ kernel/ponies.o

to obtain both defconfig-build/kernel/ponies.o{,.orig} and easily
inspect what objtool actually did.

Suggested-by: Borislav Petkov 
Signed-off-by: Peter Zijlstra (Intel) 
Acked-by: Josh Poimboeuf 
Link: https://lkml.kernel.org/r/20210226110004.252553...@infradead.org
---
 tools/objtool/builtin-check.c | 25 +
 1 file changed, 25 insertions(+)

diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 0399752..8b38b5d 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -15,6 +15,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -26,6 +27,11 @@ static const char * const check_usage[] = {
NULL,
 };
 
+static const char * const env_usage[] = {
+   "OBJTOOL_ARGS=\"\"",
+   NULL,
+};
+
 const struct option check_options[] = {
OPT_BOOLEAN('f', "no-fp", _fp, "Skip frame pointer validation"),
OPT_BOOLEAN('u', "no-unreachable", _unreachable, "Skip 'unreachable 
instruction' warnings"),
@@ -44,6 +50,25 @@ const struct option check_options[] = {
 
 int cmd_parse_options(int argc, const char **argv, const char * const usage[])
 {
+   const char *envv[16] = { };
+   char *env;
+   int envc;
+
+   env = getenv("OBJTOOL_ARGS");
+   if (env) {
+   envv[0] = "OBJTOOL_ARGS";
+   for (envc = 1; envc < ARRAY_SIZE(envv); ) {
+   envv[envc++] = env;
+   env = strchr(env, ' ');
+   if (!env)
+   break;
+   *env = '\0';
+   env++;
+   }
+
+   parse_options(envc, envv, check_options, env_usage, 0);
+   }
+
argc = parse_options(argc, argv, check_options, usage, 0);
if (argc != 1)
usage_with_options(usage, check_options);


[tip: objtool/core] objtool: Allow UNWIND_HINT to suppress dodgy stack modifications

2021-03-03 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 8c0cca513be9e3dd9c17b55b72b66751f3487577
Gitweb:
https://git.kernel.org/tip/8c0cca513be9e3dd9c17b55b72b66751f3487577
Author:Peter Zijlstra 
AuthorDate:Thu, 11 Feb 2021 13:03:28 +01:00
Committer: Peter Zijlstra 
CommitterDate: Wed, 03 Mar 2021 09:38:29 +01:00

objtool: Allow UNWIND_HINT to suppress dodgy stack modifications

rewind_stack_do_exit()
UNWIND_HINT_FUNC
/* Prevent any naive code from trying to unwind to our caller. */

xorl%ebp, %ebp
movqPER_CPU_VAR(cpu_current_top_of_stack), %rax
leaq-PTREGS_SIZE(%rax), %rsp
UNWIND_HINT_REGS

calldo_exit

Does unspeakable things to the stack, which objtool currently fails to
detect due to a limitation in instruction decoding. This will be
rectified after which the above will result in:

arch/x86/entry/entry_64.o: warning: objtool: .text+0xab: unsupported stack 
register modification

Allow the UNWIND_HINT on the next instruction to suppress this, it
will overwrite the state anyway.

Suggested-by: Josh Poimboeuf 
Signed-off-by: Peter Zijlstra (Intel) 
Acked-by: Josh Poimboeuf 
Tested-by: Nick Desaulniers 
Link: https://lkml.kernel.org/r/20210211173626.918498...@infradead.org
---
 tools/objtool/check.c | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 068cdb4..12b8f0f 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1959,8 +1959,9 @@ static void restore_reg(struct cfi_state *cfi, unsigned 
char reg)
  *   41 5d pop%r13
  *   c3retq
  */
-static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
-struct stack_op *op)
+static int update_cfi_state(struct instruction *insn,
+   struct instruction *next_insn,
+   struct cfi_state *cfi, struct stack_op *op)
 {
struct cfi_reg *cfa = >cfa;
struct cfi_reg *regs = cfi->regs;
@@ -2161,7 +2162,7 @@ static int update_cfi_state(struct instruction *insn, 
struct cfi_state *cfi,
break;
}
 
-   if (op->dest.reg == cfi->cfa.base) {
+   if (op->dest.reg == cfi->cfa.base && !(next_insn && 
next_insn->hint)) {
WARN_FUNC("unsupported stack register 
modification",
  insn->sec, insn->offset);
return -1;
@@ -2433,13 +2434,15 @@ static int propagate_alt_cfi(struct objtool_file *file, 
struct instruction *insn
return 0;
 }
 
-static int handle_insn_ops(struct instruction *insn, struct insn_state *state)
+static int handle_insn_ops(struct instruction *insn,
+  struct instruction *next_insn,
+  struct insn_state *state)
 {
struct stack_op *op;
 
list_for_each_entry(op, >stack_ops, list) {
 
-   if (update_cfi_state(insn, >cfi, op))
+   if (update_cfi_state(insn, next_insn, >cfi, op))
return 1;
 
if (op->dest.type == OP_DEST_PUSHF) {
@@ -2719,7 +2722,7 @@ static int validate_branch(struct objtool_file *file, 
struct symbol *func,
return 0;
}
 
-   if (handle_insn_ops(insn, ))
+   if (handle_insn_ops(insn, next_insn, ))
return 1;
 
switch (insn->type) {


[tip: sched/urgent] sched: Simplify migration_cpu_stop()

2021-03-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/urgent branch of tip:

Commit-ID: 6430eb536a97036b1d529cbf383cfe36e41a2f97
Gitweb:
https://git.kernel.org/tip/6430eb536a97036b1d529cbf383cfe36e41a2f97
Author:Peter Zijlstra 
AuthorDate:Wed, 24 Feb 2021 11:50:39 +01:00
Committer: Peter Zijlstra 
CommitterDate: Mon, 01 Mar 2021 11:02:13 +01:00

sched: Simplify migration_cpu_stop()

When affine_move_task() issues a migration_cpu_stop(), the purpose of
that function is to complete that @pending, not any random other
p->migration_pending that might have gotten installed since.

This realization much simplifies migration_cpu_stop() and allows
further necessary steps to fix all this as it provides the guarantee
that @pending's stopper will complete @pending (and not some random
other @pending).

Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()")
Cc: sta...@kernel.org
Signed-off-by: Peter Zijlstra (Intel) 
Reviewed-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210224131355.430014...@infradead.org
---
 kernel/sched/core.c | 56 ++--
 1 file changed, 8 insertions(+), 48 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 79ddba5..088e8f4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1898,8 +1898,8 @@ static struct rq *__migrate_task(struct rq *rq, struct 
rq_flags *rf,
  */
 static int migration_cpu_stop(void *data)
 {
-   struct set_affinity_pending *pending;
struct migration_arg *arg = data;
+   struct set_affinity_pending *pending = arg->pending;
struct task_struct *p = arg->task;
int dest_cpu = arg->dest_cpu;
struct rq *rq = this_rq();
@@ -1921,25 +1921,6 @@ static int migration_cpu_stop(void *data)
raw_spin_lock(>pi_lock);
rq_lock(rq, );
 
-   pending = p->migration_pending;
-   if (pending && !arg->pending) {
-   /*
-* This happens from sched_exec() and migrate_task_to(),
-* neither of them care about pending and just want a task to
-* maybe move about.
-*
-* Even if there is a pending, we can ignore it, since
-* affine_move_task() will have it's own stop_work's in flight
-* which will manage the completion.
-*
-* Notably, pending doesn't need to match arg->pending. This can
-* happen when tripple concurrent affine_move_task() first sets
-* pending, then clears pending and eventually sets another
-* pending.
-*/
-   pending = NULL;
-   }
-
/*
 * If task_rq(p) != rq, it cannot be migrated here, because we're
 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
@@ -1950,31 +1931,20 @@ static int migration_cpu_stop(void *data)
goto out;
 
if (pending) {
-   p->migration_pending = NULL;
+   if (p->migration_pending == pending)
+   p->migration_pending = NULL;
complete = true;
}
 
-   /* migrate_enable() --  we must not race against SCA */
-   if (dest_cpu < 0) {
-   /*
-* When this was migrate_enable() but we no longer
-* have a @pending, a concurrent SCA 'fixed' things
-* and we should be valid again. Nothing to do.
-*/
-   if (!pending) {
-   WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), 
>cpus_mask));
-   goto out;
-   }
-
+   if (dest_cpu < 0)
dest_cpu = cpumask_any_distribute(>cpus_mask);
-   }
 
if (task_on_rq_queued(p))
rq = __migrate_task(rq, , p, dest_cpu);
else
p->wake_cpu = dest_cpu;
 
-   } else if (dest_cpu < 0 || pending) {
+   } else if (pending) {
/*
 * This happens when we get migrated between migrate_enable()'s
 * preempt_enable() and scheduling the stopper task. At that
@@ -1989,23 +1959,14 @@ static int migration_cpu_stop(void *data)
 * ->pi_lock, so the allowed mask is stable - if it got
 * somewhere allowed, we're done.
 */
-   if (pending && cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
-   p->migration_pending = NULL;
+   if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
+   if (p->migration_pending == pending)
+   p->migration_pending = NULL;
complete = true;
goto out;
  

[tip: sched/urgent] sched: Fix migration_cpu_stop() requeueing

2021-03-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/urgent branch of tip:

Commit-ID: b8e45e2a14bab684713f5dfc70c9e578c333dcdd
Gitweb:
https://git.kernel.org/tip/b8e45e2a14bab684713f5dfc70c9e578c333dcdd
Author:Peter Zijlstra 
AuthorDate:Sat, 13 Feb 2021 13:10:35 +01:00
Committer: Peter Zijlstra 
CommitterDate: Mon, 01 Mar 2021 11:02:13 +01:00

sched: Fix migration_cpu_stop() requeueing

When affine_move_task(p) is called on a running task @p, which is not
otherwise already changing affinity, we'll first set
p->migration_pending and then do:

 stop_one_cpu(cpu_of_rq(rq), migration_cpu_stop, );

This then gets us to migration_cpu_stop() running on the CPU that was
previously running our victim task @p.

If we find that our task is no longer on that runqueue (this can
happen because of a concurrent migration due to load-balance etc.),
then we'll end up at the:

} else if (dest_cpu < 1 || pending) {

branch. Which we'll take because we set pending earlier. Here we first
check if the task @p has already satisfied the affinity constraints,
if so we bail early [A]. Otherwise we'll reissue migration_cpu_stop()
onto the CPU that is now hosting our task @p:

stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
>arg, >stop_work);

Except, we've never initialized pending->arg, which will be all 0s.

This then results in running migration_cpu_stop() on the next CPU with
arg->p == NULL, which gives the by now obvious result of fireworks.

The cure is to change affine_move_task() to always use pending->arg,
furthermore we can use the exact same pattern as the
SCA_MIGRATE_ENABLE case, since we'll block on the pending->done
completion anyway, no point in adding yet another completion in
stop_one_cpu().

This then gives a clear distinction between the two
migration_cpu_stop() use cases:

  - sched_exec() / migrate_task_to() : arg->pending == NULL
  - affine_move_task() : arg->pending != NULL;

And we can have it ignore p->migration_pending when !arg->pending. Any
stop work from sched_exec() / migrate_task_to() is in addition to stop
works from affine_move_task(), which will be sufficient to issue the
completion.

Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()")
Cc: sta...@kernel.org
Signed-off-by: Peter Zijlstra (Intel) 
Reviewed-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210224131355.357743...@infradead.org
---
 kernel/sched/core.c | 39 ---
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ca2bb62..79ddba5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1922,6 +1922,24 @@ static int migration_cpu_stop(void *data)
rq_lock(rq, );
 
pending = p->migration_pending;
+   if (pending && !arg->pending) {
+   /*
+* This happens from sched_exec() and migrate_task_to(),
+* neither of them care about pending and just want a task to
+* maybe move about.
+*
+* Even if there is a pending, we can ignore it, since
+* affine_move_task() will have it's own stop_work's in flight
+* which will manage the completion.
+*
+* Notably, pending doesn't need to match arg->pending. This can
+* happen when tripple concurrent affine_move_task() first sets
+* pending, then clears pending and eventually sets another
+* pending.
+*/
+   pending = NULL;
+   }
+
/*
 * If task_rq(p) != rq, it cannot be migrated here, because we're
 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
@@ -2194,10 +2212,6 @@ static int affine_move_task(struct rq *rq, struct 
task_struct *p, struct rq_flag
int dest_cpu, unsigned int flags)
 {
struct set_affinity_pending my_pending = { }, *pending = NULL;
-   struct migration_arg arg = {
-   .task = p,
-   .dest_cpu = dest_cpu,
-   };
bool complete = false;
 
/* Can the task run on the task's current CPU? If so, we're done */
@@ -2235,6 +2249,12 @@ static int affine_move_task(struct rq *rq, struct 
task_struct *p, struct rq_flag
/* Install the request */
refcount_set(_pending.refs, 1);
init_completion(_pending.done);
+   my_pending.arg = (struct migration_arg) {
+   .task = p,
+   .dest_cpu = -1, /* any */
+   .pending = _pending,
+   };
+
p->migration_pending = _pending;
} else {
pending = p->migration_pending;
@@ -2265,12 +2285,6 @@ static int 

[tip: sched/urgent] sched: Collate affine_move_task() stoppers

2021-03-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/urgent branch of tip:

Commit-ID: dbf983c0a5c37da2d476564792bd84e0e8f067fc
Gitweb:
https://git.kernel.org/tip/dbf983c0a5c37da2d476564792bd84e0e8f067fc
Author:Peter Zijlstra 
AuthorDate:Wed, 24 Feb 2021 11:15:23 +01:00
Committer: Peter Zijlstra 
CommitterDate: Mon, 01 Mar 2021 11:02:14 +01:00

sched: Collate affine_move_task() stoppers

The SCA_MIGRATE_ENABLE and task_running() cases are almost identical,
collapse them to avoid further duplication.

Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()")
Cc: sta...@kernel.org
Signed-off-by: Peter Zijlstra (Intel) 
Reviewed-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210224131355.500108...@infradead.org
---
 kernel/sched/core.c | 23 ---
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 088e8f4..84b657f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2239,30 +2239,23 @@ static int affine_move_task(struct rq *rq, struct 
task_struct *p, struct rq_flag
return -EINVAL;
}
 
-   if (flags & SCA_MIGRATE_ENABLE) {
-
-   refcount_inc(>refs); /* pending->{arg,stop_work} */
-   p->migration_flags &= ~MDF_PUSH;
-   task_rq_unlock(rq, p, rf);
-
-   stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
-   >arg, >stop_work);
-
-   return 0;
-   }
-
if (task_running(rq, p) || p->state == TASK_WAKING) {
/*
-* Lessen races (and headaches) by delegating
-* is_migration_disabled(p) checks to the stopper, which will
-* run on the same CPU as said p.
+* MIGRATE_ENABLE gets here because 'p == current', but for
+* anything else we cannot do is_migration_disabled(), punt
+* and have the stopper function handle it all race-free.
 */
+
refcount_inc(>refs); /* pending->{arg,stop_work} */
+   if (flags & SCA_MIGRATE_ENABLE)
+   p->migration_flags &= ~MDF_PUSH;
task_rq_unlock(rq, p, rf);
 
stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
>arg, >stop_work);
 
+   if (flags & SCA_MIGRATE_ENABLE)
+   return 0;
} else {
 
if (!is_migration_disabled(p)) {


[tip: sched/urgent] sched: Optimize migration_cpu_stop()

2021-03-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/urgent branch of tip:

Commit-ID: 9eca0f53b1c2f5acb85e84673e263bf996817a24
Gitweb:
https://git.kernel.org/tip/9eca0f53b1c2f5acb85e84673e263bf996817a24
Author:Peter Zijlstra 
AuthorDate:Wed, 24 Feb 2021 11:21:35 +01:00
Committer: Peter Zijlstra 
CommitterDate: Mon, 01 Mar 2021 11:02:14 +01:00

sched: Optimize migration_cpu_stop()

When the purpose of migration_cpu_stop() is to migrate the task to
'any' valid CPU, don't migrate the task when it's already running on a
valid CPU.

Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()")
Cc: sta...@kernel.org
Signed-off-by: Peter Zijlstra (Intel) 
Reviewed-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210224131355.569238...@infradead.org
---
 kernel/sched/core.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 84b657f..ac05afb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1936,14 +1936,25 @@ static int migration_cpu_stop(void *data)
complete = true;
}
 
-   if (dest_cpu < 0)
+   if (dest_cpu < 0) {
+   if (cpumask_test_cpu(task_cpu(p), >cpus_mask))
+   goto out;
+
dest_cpu = cpumask_any_distribute(>cpus_mask);
+   }
 
if (task_on_rq_queued(p))
rq = __migrate_task(rq, , p, dest_cpu);
else
p->wake_cpu = dest_cpu;
 
+   /*
+* XXX __migrate_task() can fail, at which point we might end
+* up running on a dodgy CPU, AFAICT this can only happen
+* during CPU hotplug, at which point we'll get pushed out
+* anyway, so it's probably not a big deal.
+*/
+
} else if (pending) {
/*
 * This happens when we get migrated between migrate_enable()'s


[tip: sched/urgent] sched: Fix affine_move_task() self-concurrency

2021-03-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/urgent branch of tip:

Commit-ID: de8115ef5c83ef2c9941684019d59f4c2e5d16ce
Gitweb:
https://git.kernel.org/tip/de8115ef5c83ef2c9941684019d59f4c2e5d16ce
Author:Peter Zijlstra 
AuthorDate:Wed, 24 Feb 2021 11:31:09 +01:00
Committer: Peter Zijlstra 
CommitterDate: Mon, 01 Mar 2021 11:02:14 +01:00

sched: Fix affine_move_task() self-concurrency

Consider:

   sched_setaffinity(p, X); sched_setaffinity(p, Y);

Then the first will install p->migration_pending = _pending; and
issue stop_one_cpu_nowait(pending); and the second one will read
p->migration_pending and _also_ issue: stop_one_cpu_nowait(pending),
the _SAME_ @pending.

This causes stopper list corruption.

Add set_affinity_pending::stop_pending, to indicate if a stopper is in
progress.

Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()")
Cc: sta...@kernel.org
Signed-off-by: Peter Zijlstra (Intel) 
Reviewed-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210224131355.649146...@infradead.org
---
 kernel/sched/core.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ac05afb..4e4d100 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1864,6 +1864,7 @@ struct migration_arg {
 
 struct set_affinity_pending {
refcount_t  refs;
+   unsigned intstop_pending;
struct completion   done;
struct cpu_stop_workstop_work;
struct migration_argarg;
@@ -1982,12 +1983,15 @@ static int migration_cpu_stop(void *data)
 * determine is_migration_disabled() and so have to chase after
 * it.
 */
+   WARN_ON_ONCE(!pending->stop_pending);
task_rq_unlock(rq, p, );
stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop,
>arg, >stop_work);
return 0;
}
 out:
+   if (pending)
+   pending->stop_pending = false;
task_rq_unlock(rq, p, );
 
if (complete)
@@ -2183,7 +2187,7 @@ static int affine_move_task(struct rq *rq, struct 
task_struct *p, struct rq_flag
int dest_cpu, unsigned int flags)
 {
struct set_affinity_pending my_pending = { }, *pending = NULL;
-   bool complete = false;
+   bool stop_pending, complete = false;
 
/* Can the task run on the task's current CPU? If so, we're done */
if (cpumask_test_cpu(task_cpu(p), >cpus_mask)) {
@@ -2256,14 +2260,19 @@ static int affine_move_task(struct rq *rq, struct 
task_struct *p, struct rq_flag
 * anything else we cannot do is_migration_disabled(), punt
 * and have the stopper function handle it all race-free.
 */
+   stop_pending = pending->stop_pending;
+   if (!stop_pending)
+   pending->stop_pending = true;
 
refcount_inc(>refs); /* pending->{arg,stop_work} */
if (flags & SCA_MIGRATE_ENABLE)
p->migration_flags &= ~MDF_PUSH;
task_rq_unlock(rq, p, rf);
 
-   stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
-   >arg, >stop_work);
+   if (!stop_pending) {
+   stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
+   >arg, >stop_work);
+   }
 
if (flags & SCA_MIGRATE_ENABLE)
return 0;


[tip: sched/urgent] sched: Simplify set_affinity_pending refcounts

2021-03-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/urgent branch of tip:

Commit-ID: a4c2579076dc6951709a8e425df8369ab6eb2f24
Gitweb:
https://git.kernel.org/tip/a4c2579076dc6951709a8e425df8369ab6eb2f24
Author:Peter Zijlstra 
AuthorDate:Wed, 24 Feb 2021 11:42:08 +01:00
Committer: Peter Zijlstra 
CommitterDate: Mon, 01 Mar 2021 11:02:15 +01:00

sched: Simplify set_affinity_pending refcounts

Now that we have set_affinity_pending::stop_pending to indicate if a
stopper is in progress, and we have the guarantee that if that stopper
exists, it will (eventually) complete our @pending we can simplify the
refcount scheme by no longer counting the stopper thread.

Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()")
Cc: sta...@kernel.org
Signed-off-by: Peter Zijlstra (Intel) 
Reviewed-by: Valentin Schneider 
Link: https://lkml.kernel.org/r/20210224131355.724130...@infradead.org
---
 kernel/sched/core.c | 32 
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4e4d100..9819121 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1862,6 +1862,10 @@ struct migration_arg {
struct set_affinity_pending *pending;
 };
 
+/*
+ * @refs: number of wait_for_completion()
+ * @stop_pending: is @stop_work in use
+ */
 struct set_affinity_pending {
refcount_t  refs;
unsigned intstop_pending;
@@ -1997,10 +2001,6 @@ out:
if (complete)
complete_all(>done);
 
-   /* For pending->{arg,stop_work} */
-   if (pending && refcount_dec_and_test(>refs))
-   wake_up_var(>refs);
-
return 0;
 }
 
@@ -2199,12 +2199,16 @@ static int affine_move_task(struct rq *rq, struct 
task_struct *p, struct rq_flag
push_task = get_task_struct(p);
}
 
+   /*
+* If there are pending waiters, but no pending stop_work,
+* then complete now.
+*/
pending = p->migration_pending;
-   if (pending) {
-   refcount_inc(>refs);
+   if (pending && !pending->stop_pending) {
p->migration_pending = NULL;
complete = true;
}
+
task_rq_unlock(rq, p, rf);
 
if (push_task) {
@@ -2213,7 +2217,7 @@ static int affine_move_task(struct rq *rq, struct 
task_struct *p, struct rq_flag
}
 
if (complete)
-   goto do_complete;
+   complete_all(>done);
 
return 0;
}
@@ -2264,9 +2268,9 @@ static int affine_move_task(struct rq *rq, struct 
task_struct *p, struct rq_flag
if (!stop_pending)
pending->stop_pending = true;
 
-   refcount_inc(>refs); /* pending->{arg,stop_work} */
if (flags & SCA_MIGRATE_ENABLE)
p->migration_flags &= ~MDF_PUSH;
+
task_rq_unlock(rq, p, rf);
 
if (!stop_pending) {
@@ -2282,12 +2286,13 @@ static int affine_move_task(struct rq *rq, struct 
task_struct *p, struct rq_flag
if (task_on_rq_queued(p))
rq = move_queued_task(rq, rf, p, dest_cpu);
 
-   p->migration_pending = NULL;
-   complete = true;
+   if (!pending->stop_pending) {
+   p->migration_pending = NULL;
+   complete = true;
+   }
}
task_rq_unlock(rq, p, rf);
 
-do_complete:
if (complete)
complete_all(>done);
}
@@ -2295,7 +2300,7 @@ do_complete:
wait_for_completion(>done);
 
if (refcount_dec_and_test(>refs))
-   wake_up_var(>refs);
+   wake_up_var(>refs); /* No UaF, just an address */
 
/*
 * Block the original owner of  until all subsequent callers
@@ -2303,6 +2308,9 @@ do_complete:
 */
wait_var_event(_pending.refs, !refcount_read(_pending.refs));
 
+   /* ARGH */
+   WARN_ON_ONCE(my_pending.stop_pending);
+
return 0;
 }
 


[tip: locking/urgent] static_call: Fix the module key fixup

2021-03-01 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the locking/urgent branch of tip:

Commit-ID: 8b97c027dfe4ba195be08fd0e18f716005763b8a
Gitweb:
https://git.kernel.org/tip/8b97c027dfe4ba195be08fd0e18f716005763b8a
Author:Peter Zijlstra 
AuthorDate:Thu, 25 Feb 2021 23:03:51 +01:00
Committer: Peter Zijlstra 
CommitterDate: Mon, 01 Mar 2021 11:02:10 +01:00

static_call: Fix the module key fixup

Provided the target address of a R_X86_64_PC32 relocation is aligned,
the low two bits should be invariant between the relative and absolute
value.

Turns out the address is not aligned and things go sideways, ensure we
transfer the bits in the absolute form when fixing up the key address.

Fixes: 73f44fe19d35 ("static_call: Allow module use without exposing 
static_call_key")
Reported-by: Steven Rostedt 
Signed-off-by: Peter Zijlstra (Intel) 
Tested-by: Steven Rostedt (VMware) 
Link: 
https://lkml.kernel.org/r/20210225220351.ge4...@worktop.programming.kicks-ass.net
---
 kernel/static_call.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/static_call.c b/kernel/static_call.c
index 6906c6e..ae82529 100644
--- a/kernel/static_call.c
+++ b/kernel/static_call.c
@@ -349,7 +349,8 @@ static int static_call_add_module(struct module *mod)
struct static_call_site *site;
 
for (site = start; site != stop; site++) {
-   unsigned long addr = (unsigned long)static_call_key(site);
+   unsigned long s_key = (long)site->key + (long)>key;
+   unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS;
unsigned long key;
 
/*
@@ -373,8 +374,8 @@ static int static_call_add_module(struct module *mod)
return -EINVAL;
}
 
-   site->key = (key - (long)>key) |
-   (site->key & STATIC_CALL_SITE_FLAGS);
+   key |= s_key & STATIC_CALL_SITE_FLAGS;
+   site->key = key - (long)>key;
}
 
return __static_call_init(mod, start, stop);


[tip: x86/entry] objtool: Fix stack-swizzle for FRAME_POINTER=y

2021-02-22 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the x86/entry branch of tip:

Commit-ID: 724c8a23d589d8a002d2e39633c2f9a5a429616f
Gitweb:
https://git.kernel.org/tip/724c8a23d589d8a002d2e39633c2f9a5a429616f
Author:Peter Zijlstra 
AuthorDate:Thu, 18 Feb 2021 17:14:10 +01:00
Committer: Thomas Gleixner 
CommitterDate: Mon, 22 Feb 2021 19:54:09 +01:00

objtool: Fix stack-swizzle for FRAME_POINTER=y

When objtool encounters the stack-swizzle:

mov %rsp, (%[tos])
mov %[tos], %rsp
...
pop %rsp

Inside a FRAME_POINTER=y build, things go a little screwy because
clearly we're not adjusting the cfa->base. This then results in the
pop %rsp not being detected as a restore of cfa->base so it will turn
into a regular POP and offset the stack, resulting in:

  kernel/softirq.o: warning: objtool: do_softirq()+0xdb: return with modified 
stack frame

Therefore, have "mov %[tos], %rsp" act like a PUSH (it sorta is
anyway) to balance the things out. We're not too concerned with the
actual stack_size for frame-pointer builds, since we don't generate
ORC data for them anyway.

Fixes: aafeb14e9da2 ("objtool: Support stack-swizzle")
Reported-by: kernel test robot 
Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Thomas Gleixner 
Acked-by: Josh Poimboeuf 
Link: https://lkml.kernel.org/r/yc6uc+rc9kkmq...@hirez.programming.kicks-ass.net
---
 tools/objtool/check.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 8e74210..2087974 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1983,6 +1983,20 @@ static int update_cfi_state(struct instruction *insn, 
struct cfi_state *cfi,
}
}
 
+   else if (op->dest.reg == CFI_SP &&
+cfi->vals[op->src.reg].base == CFI_SP_INDIRECT 
&&
+cfi->vals[op->src.reg].offset == cfa->offset) {
+
+   /*
+* The same stack swizzle case 2) as above. But
+* because we can't change cfa->base, case 3)
+* will become a regular POP. Pretend we're a
+* PUSH so things don't go unbalanced.
+*/
+   cfi->stack_size += 8;
+   }
+
+
break;
 
case OP_SRC_ADD:


[tip: objtool/core] objtool: Fix stack-swizzle for FRAME_POINTER=y

2021-02-22 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the objtool/core branch of tip:

Commit-ID: 23e34c5988088b8bb4c55905973ca76114cb33ee
Gitweb:
https://git.kernel.org/tip/23e34c5988088b8bb4c55905973ca76114cb33ee
Author:Peter Zijlstra 
AuthorDate:Thu, 18 Feb 2021 17:14:10 +01:00
Committer: Peter Zijlstra 
CommitterDate: Mon, 22 Feb 2021 12:05:18 +01:00

objtool: Fix stack-swizzle for FRAME_POINTER=y

When objtool encounters the stack-swizzle:

mov %rsp, (%[tos])
mov %[tos], %rsp
...
pop %rsp

Inside a FRAME_POINTER=y build, things go a little screwy because
clearly we're not adjusting the cfa->base. This then results in the
pop %rsp not being detected as a restore of cfa->base so it will turn
into a regular POP and offset the stack, resulting in:

  kernel/softirq.o: warning: objtool: do_softirq()+0xdb: return with modified 
stack frame

Therefore, have "mov %[tos], %rsp" act like a PUSH (it sorta is
anyway) to balance the things out. We're not too concerned with the
actual stack_size for frame-pointer builds, since we don't generate
ORC data for them anyway.

Fixes: aafeb14e9da2 ("objtool: Support stack-swizzle")
Reported-by: kernel test robot 
Signed-off-by: Peter Zijlstra (Intel) 
Acked-by: Josh Poimboeuf 
Link: https://lkml.kernel.org/r/yc6uc+rc9kkmq...@hirez.programming.kicks-ass.net
---
 tools/objtool/check.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 62cd211..d7f1496 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1983,6 +1983,20 @@ static int update_cfi_state(struct instruction *insn, 
struct cfi_state *cfi,
}
}
 
+   else if (op->dest.reg == CFI_SP &&
+cfi->vals[op->src.reg].base == CFI_SP_INDIRECT 
&&
+cfi->vals[op->src.reg].offset == cfa->offset) {
+
+   /*
+* The same stack swizzle case 2) as above. But
+* because we can't change cfa->base, case 3)
+* will become a regular POP. Pretend we're a
+* PUSH so things don't go unbalanced.
+*/
+   cfi->stack_size += 8;
+   }
+
+
break;
 
case OP_SRC_ADD:


[tip: sched/core] rbtree, perf: Use new rbtree helpers

2021-02-17 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: a3b89864554bbce1594b7abdb5739fc708c1ca95
Gitweb:
https://git.kernel.org/tip/a3b89864554bbce1594b7abdb5739fc708c1ca95
Author:Peter Zijlstra 
AuthorDate:Wed, 29 Apr 2020 17:05:15 +02:00
Committer: Ingo Molnar 
CommitterDate: Wed, 17 Feb 2021 14:07:48 +01:00

rbtree, perf: Use new rbtree helpers

Reduce rbtree boiler plate by using the new helpers.

One noteworthy change is unification of the various (partial) compare
functions. We construct a subtree match by forcing the sub-order to
always match, see __group_cmp().

Due to 'const' we had to touch cgroup_id().

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Acked-by: Tejun Heo 
Acked-by: Davidlohr Bueso 
---
 include/linux/cgroup.h |   4 +-
 kernel/events/core.c   | 195 ++--
 2 files changed, 92 insertions(+), 107 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 451c2d2..4f2f79d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -307,7 +307,7 @@ void css_task_iter_end(struct css_task_iter *it);
  * Inline functions.
  */
 
-static inline u64 cgroup_id(struct cgroup *cgrp)
+static inline u64 cgroup_id(const struct cgroup *cgrp)
 {
return cgrp->kn->id;
 }
@@ -701,7 +701,7 @@ void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t 
buflen);
 struct cgroup_subsys_state;
 struct cgroup;
 
-static inline u64 cgroup_id(struct cgroup *cgrp) { return 1; }
+static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; }
 static inline void css_get(struct cgroup_subsys_state *css) {}
 static inline void css_put(struct cgroup_subsys_state *css) {}
 static inline int cgroup_attach_task_all(struct task_struct *from,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 55d1879..3d89096 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1595,50 +1595,91 @@ static void perf_event_groups_init(struct 
perf_event_groups *groups)
groups->index = 0;
 }
 
+static inline struct cgroup *event_cgroup(const struct perf_event *event)
+{
+   struct cgroup *cgroup = NULL;
+
+#ifdef CONFIG_CGROUP_PERF
+   if (event->cgrp)
+   cgroup = event->cgrp->css.cgroup;
+#endif
+
+   return cgroup;
+}
+
 /*
  * Compare function for event groups;
  *
  * Implements complex key that first sorts by CPU and then by virtual index
  * which provides ordering when rotating groups for the same CPU.
  */
-static bool
-perf_event_groups_less(struct perf_event *left, struct perf_event *right)
+static __always_inline int
+perf_event_groups_cmp(const int left_cpu, const struct cgroup *left_cgroup,
+ const u64 left_group_index, const struct perf_event 
*right)
 {
-   if (left->cpu < right->cpu)
-   return true;
-   if (left->cpu > right->cpu)
-   return false;
+   if (left_cpu < right->cpu)
+   return -1;
+   if (left_cpu > right->cpu)
+   return 1;
 
 #ifdef CONFIG_CGROUP_PERF
-   if (left->cgrp != right->cgrp) {
-   if (!left->cgrp || !left->cgrp->css.cgroup) {
-   /*
-* Left has no cgroup but right does, no cgroups come
-* first.
-*/
-   return true;
-   }
-   if (!right->cgrp || !right->cgrp->css.cgroup) {
-   /*
-* Right has no cgroup but left does, no cgroups come
-* first.
-*/
-   return false;
-   }
-   /* Two dissimilar cgroups, order by id. */
-   if (left->cgrp->css.cgroup->kn->id < 
right->cgrp->css.cgroup->kn->id)
-   return true;
+   {
+   const struct cgroup *right_cgroup = event_cgroup(right);
 
-   return false;
+   if (left_cgroup != right_cgroup) {
+   if (!left_cgroup) {
+   /*
+* Left has no cgroup but right does, no
+* cgroups come first.
+*/
+   return -1;
+   }
+   if (!right_cgroup) {
+   /*
+* Right has no cgroup but left does, no
+* cgroups come first.
+*/
+   return 1;
+   }
+   /* Two dissimilar cgroups, order by id. */
+   if (cgroup_id(left_cgroup) < cgroup_id(right_cgroup))
+   return -1;
+
+   return 1;
+   }
}
 #endif
 
-   if (left->group_index < right->group_index)
-   

[tip: sched/core] rbtree, timerqueue: Use rb_add_cached()

2021-02-17 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: 798172b1374e28ecf687d6662fc5fdaec5c65385
Gitweb:
https://git.kernel.org/tip/798172b1374e28ecf687d6662fc5fdaec5c65385
Author:Peter Zijlstra 
AuthorDate:Wed, 29 Apr 2020 17:07:53 +02:00
Committer: Ingo Molnar 
CommitterDate: Wed, 17 Feb 2021 14:08:01 +01:00

rbtree, timerqueue: Use rb_add_cached()

Reduce rbtree boiler plate by using the new helpers.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Acked-by: Davidlohr Bueso 
---
 lib/timerqueue.c | 28 +---
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/lib/timerqueue.c b/lib/timerqueue.c
index c527109..cdb9c76 100644
--- a/lib/timerqueue.c
+++ b/lib/timerqueue.c
@@ -14,6 +14,14 @@
 #include 
 #include 
 
+#define __node_2_tq(_n) \
+   rb_entry((_n), struct timerqueue_node, node)
+
+static inline bool __timerqueue_less(struct rb_node *a, const struct rb_node 
*b)
+{
+   return __node_2_tq(a)->expires < __node_2_tq(b)->expires;
+}
+
 /**
  * timerqueue_add - Adds timer to timerqueue.
  *
@@ -26,28 +34,10 @@
  */
 bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
 {
-   struct rb_node **p = >rb_root.rb_root.rb_node;
-   struct rb_node *parent = NULL;
-   struct timerqueue_node *ptr;
-   bool leftmost = true;
-
/* Make sure we don't add nodes that are already added */
WARN_ON_ONCE(!RB_EMPTY_NODE(>node));
 
-   while (*p) {
-   parent = *p;
-   ptr = rb_entry(parent, struct timerqueue_node, node);
-   if (node->expires < ptr->expires) {
-   p = &(*p)->rb_left;
-   } else {
-   p = &(*p)->rb_right;
-   leftmost = false;
-   }
-   }
-   rb_link_node(>node, parent, p);
-   rb_insert_color_cached(>node, >rb_root, leftmost);
-
-   return leftmost;
+   return rb_add_cached(>node, >rb_root, __timerqueue_less);
 }
 EXPORT_SYMBOL_GPL(timerqueue_add);
 


[tip: sched/core] rbtree, uprobes: Use rbtree helpers

2021-02-17 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: a905e84e64083a0ee701f61810badee234050825
Gitweb:
https://git.kernel.org/tip/a905e84e64083a0ee701f61810badee234050825
Author:Peter Zijlstra 
AuthorDate:Wed, 29 Apr 2020 17:06:27 +02:00
Committer: Ingo Molnar 
CommitterDate: Wed, 17 Feb 2021 14:07:52 +01:00

rbtree, uprobes: Use rbtree helpers

Reduce rbtree boilerplate by using the new helpers.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Acked-by: Davidlohr Bueso 
---
 kernel/events/uprobes.c | 80 +++-
 1 file changed, 39 insertions(+), 41 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index bf9edd8..fd5160d 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -613,41 +613,56 @@ static void put_uprobe(struct uprobe *uprobe)
}
 }
 
-static int match_uprobe(struct uprobe *l, struct uprobe *r)
+static __always_inline
+int uprobe_cmp(const struct inode *l_inode, const loff_t l_offset,
+  const struct uprobe *r)
 {
-   if (l->inode < r->inode)
+   if (l_inode < r->inode)
return -1;
 
-   if (l->inode > r->inode)
+   if (l_inode > r->inode)
return 1;
 
-   if (l->offset < r->offset)
+   if (l_offset < r->offset)
return -1;
 
-   if (l->offset > r->offset)
+   if (l_offset > r->offset)
return 1;
 
return 0;
 }
 
+#define __node_2_uprobe(node) \
+   rb_entry((node), struct uprobe, rb_node)
+
+struct __uprobe_key {
+   struct inode *inode;
+   loff_t offset;
+};
+
+static inline int __uprobe_cmp_key(const void *key, const struct rb_node *b)
+{
+   const struct __uprobe_key *a = key;
+   return uprobe_cmp(a->inode, a->offset, __node_2_uprobe(b));
+}
+
+static inline int __uprobe_cmp(struct rb_node *a, const struct rb_node *b)
+{
+   struct uprobe *u = __node_2_uprobe(a);
+   return uprobe_cmp(u->inode, u->offset, __node_2_uprobe(b));
+}
+
 static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset)
 {
-   struct uprobe u = { .inode = inode, .offset = offset };
-   struct rb_node *n = uprobes_tree.rb_node;
-   struct uprobe *uprobe;
-   int match;
+   struct __uprobe_key key = {
+   .inode = inode,
+   .offset = offset,
+   };
+   struct rb_node *node = rb_find(, _tree, __uprobe_cmp_key);
 
-   while (n) {
-   uprobe = rb_entry(n, struct uprobe, rb_node);
-   match = match_uprobe(, uprobe);
-   if (!match)
-   return get_uprobe(uprobe);
+   if (node)
+   return __node_2_uprobe(node);
 
-   if (match < 0)
-   n = n->rb_left;
-   else
-   n = n->rb_right;
-   }
return NULL;
 }
 
@@ -668,32 +683,15 @@ static struct uprobe *find_uprobe(struct inode *inode, 
loff_t offset)
 
 static struct uprobe *__insert_uprobe(struct uprobe *uprobe)
 {
-   struct rb_node **p = _tree.rb_node;
-   struct rb_node *parent = NULL;
-   struct uprobe *u;
-   int match;
+   struct rb_node *node;
 
-   while (*p) {
-   parent = *p;
-   u = rb_entry(parent, struct uprobe, rb_node);
-   match = match_uprobe(uprobe, u);
-   if (!match)
-   return get_uprobe(u);
+   node = rb_find_add(>rb_node, _tree, __uprobe_cmp);
+   if (node)
+   return get_uprobe(__node_2_uprobe(node));
 
-   if (match < 0)
-   p = >rb_left;
-   else
-   p = >rb_right;
-
-   }
-
-   u = NULL;
-   rb_link_node(>rb_node, parent, p);
-   rb_insert_color(>rb_node, _tree);
/* get access + creation ref */
refcount_set(>ref, 2);
-
-   return u;
+   return NULL;
 }
 
 /*


[tip: sched/core] static_call/x86: Add __static_call_return0()

2021-02-17 Thread tip-bot2 for Peter Zijlstra
The following commit has been merged into the sched/core branch of tip:

Commit-ID: 3f2a8fc4b15de18644e8a80a09edda168676e22c
Gitweb:
https://git.kernel.org/tip/3f2a8fc4b15de18644e8a80a09edda168676e22c
Author:Peter Zijlstra 
AuthorDate:Mon, 18 Jan 2021 15:12:16 +01:00
Committer: Ingo Molnar 
CommitterDate: Wed, 17 Feb 2021 14:08:43 +01:00

static_call/x86: Add __static_call_return0()

Provide a stub function that return 0 and wire up the static call site
patching to replace the CALL with a single 5 byte instruction that
clears %RAX, the return value register.

The function can be cast to any function pointer type that has a
single %RAX return (including pointers). Also provide a version that
returns an int for convenience. We are clearing the entire %RAX register
in any case, whether the return value is 32 or 64 bits, since %RAX is
always a scratch register anyway.

Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Frederic Weisbecker 
Signed-off-by: Peter Zijlstra (Intel) 
Signed-off-by: Ingo Molnar 
Link: https://lkml.kernel.org/r/20210118141223.123667-2-frede...@kernel.org
---
 arch/x86/kernel/static_call.c | 17 +++--
 include/linux/static_call.h   | 12 
 kernel/static_call.c  |  5 +
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index ca9a380..9442c41 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -11,14 +11,26 @@ enum insn_type {
RET = 3,  /* tramp / site cond-tail-call */
 };
 
+/*
+ * data16 data16 xorq %rax, %rax - a single 5 byte instruction that clears %rax
+ * The REX.W cancels the effect of any data16.
+ */
+static const u8 xor5rax[] = { 0x66, 0x66, 0x48, 0x31, 0xc0 };
+
 static void __ref __static_call_transform(void *insn, enum insn_type type, 
void *func)
 {
+   const void *emulate = NULL;
int size = CALL_INSN_SIZE;
const void *code;
 
switch (type) {
case CALL:
code = text_gen_insn(CALL_INSN_OPCODE, insn, func);
+   if (func == &__static_call_return0) {
+   emulate = code;
+   code = 
+   }
+
break;
 
case NOP:
@@ -41,7 +53,7 @@ static void __ref __static_call_transform(void *insn, enum 
insn_type type, void 
if (unlikely(system_state == SYSTEM_BOOTING))
return text_poke_early(insn, code, size);
 
-   text_poke_bp(insn, code, size, NULL);
+   text_poke_bp(insn, code, size, emulate);
 }
 
 static void __static_call_validate(void *insn, bool tail)
@@ -54,7 +66,8 @@ static void __static_call_validate(void *insn, bool tail)
return;
} else {
if (opcode == CALL_INSN_OPCODE ||
-   !memcmp(insn, ideal_nops[NOP_ATOMIC5], 5))
+   !memcmp(insn, ideal_nops[NOP_ATOMIC5], 5) ||
+   !memcmp(insn, xor5rax, 5))
return;
}
 
diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index a2c0645..bd6735d 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -142,6 +142,8 @@ extern void __static_call_update(struct static_call_key 
*key, void *tramp, void 
 extern int static_call_mod_init(struct module *mod);
 extern int static_call_text_reserved(void *start, void *end);
 
+extern long __static_call_return0(void);
+
 #define DEFINE_STATIC_CALL(name, _func)
\
DECLARE_STATIC_CALL(name, _func);   \
struct static_call_key STATIC_CALL_KEY(name) = {\
@@ -206,6 +208,11 @@ static inline int static_call_text_reserved(void *start, 
void *end)
return 0;
 }
 
+static inline long __static_call_return0(void)
+{
+   return 0;
+}
+
 #define EXPORT_STATIC_CALL(name)   \
EXPORT_SYMBOL(STATIC_CALL_KEY(name));   \
EXPORT_SYMBOL(STATIC_CALL_TRAMP(name))
@@ -222,6 +229,11 @@ struct static_call_key {
void *func;
 };
 
+static inline long __static_call_return0(void)
+{
+   return 0;
+}
+
 #define DEFINE_STATIC_CALL(name, _func)
\
DECLARE_STATIC_CALL(name, _func);   \
struct static_call_key STATIC_CALL_KEY(name) = {\
diff --git a/kernel/static_call.c b/kernel/static_call.c
index 84565c2..0bc11b5 100644
--- a/kernel/static_call.c
+++ b/kernel/static_call.c
@@ -438,6 +438,11 @@ int __init static_call_init(void)
 }
 early_initcall(static_call_init);
 
+long __static_call_return0(void)
+{
+   return 0;
+}
+
 #ifdef CONFIG_STATIC_CALL_SELFTEST
 
 static int func_a(int x)


  1   2   3   4   5   >